In [1]:
import numpy as np
from src.utils import *
from tqdm import tqdm
import logging

unary_operators=[np.sin, np.cos, np.exp, np.abs, np.log, np.tan]
binary_operators=[np.add, np.subtract, np.multiply, np.divide]


operators = unary_operators + binary_operators

In [2]:
conf={
"NUM_POPULATION": 100,
"NUM_GENERATIONS": 600,
"MAX_DEPTH_INITIAL": 7,
"DEDUPE_INTERVAL": 15,
"ELITISM": False,
"ELITE_COUNT": 3,
"TOURNAMENT_SUBSET_SIZE": 3,
"TOURNAMENT_WINNER_SIZE": 30,
"XOVER": 0.8,
"MUTATION": 0.8,
"BREED_NEW": 15,
"KILL_AGE": 16,
"MAX_COMPLEXITY": 150,
"CONSTANT_FIT_INTERVAL": 30,
"CONSTANT_FIT_ITERATION": 6,
"UNARY_OPERATORS": [np.sin, np.cos, np.exp, np.abs, np.log, np.tan],
"BINARY_OPERATORS": [np.add, np.subtract, np.multiply, np.divide]
}

In [3]:
def evolve(train_x,train_y,val_x,val_y, conf, logger, my_population=None):
    # Set up the logger
    NUM_POPULATION = conf["NUM_POPULATION"]
    NUM_GENERATIONS = conf["NUM_GENERATIONS"]
    MAX_DEPTH_INITIAL = conf["MAX_DEPTH_INITIAL"]
    DEDUPE_INTERVAL = conf["DEDUPE_INTERVAL"]
    ELITISM = conf["ELITISM"]
    ELITE_COUNT = conf["ELITE_COUNT"]
    TOURNAMENT_SUBSET_SIZE = conf["TOURNAMENT_SUBSET_SIZE"]
    TOURNAMENT_WINNER_SIZE = conf["TOURNAMENT_WINNER_SIZE"]
    XOVER = conf["XOVER"]
    MUTATION = conf["MUTATION"]
    BREED_NEW = conf["BREED_NEW"]
    KILL_AGE = conf["KILL_AGE"]
    MAX_COMPLEXITY = conf["MAX_COMPLEXITY"]
    CONSTANT_FIT_INTERVAL = conf["CONSTANT_FIT_INTERVAL"]
    CONSTANT_FIT_ITERATION = conf["CONSTANT_FIT_ITERATION"]
    UNARY_OPERATORS = conf["UNARY_OPERATORS"]
    BINARY_OPERATORS = conf["BINARY_OPERATORS"]

    logger.info(f"Population is being created with a size: {NUM_POPULATION}")
    if my_population==None:
        my_population = create_population(NUM_POPULATION,MAX_DEPTH_INITIAL,train_x.shape[0], UNARY_OPERATORS, BINARY_OPERATORS)
        logger.info("Population has been created.")
        logger.info("Assigning individual fitness values...")
    assign_population_fitness_train(my_population,train_x,train_y)
    assign_population_fitness_val(my_population,val_x,val_y)
    logger.info(f"Population size: {len(my_population)}")
    logger.info(f"Population mean fitness: {calculate_mean_fitness(my_population)}")
    logger.info(f"Population mean complexity: {calculate_mean_complexity(my_population)}")


    # Sort the population by fitness (ascending), then take the top 5
    top_5_individuals = top_n_individuals(my_population, 5)

    # Print the top 5 individuals with the minimum fitness
    for i, individual in enumerate(top_5_individuals, 1):
        logger.info(f"Top {i}: Fitness = {individual.fitness:.5f} Fitness_val = {individual.fitness_val:.5f} Genome = {individual.genome}")
    #----------------------------------------------------------------------------
    assign_population_fitness_train(my_population,train_x,train_y)
    assign_population_fitness_val(my_population,val_x,val_y)
    best_result = top_n_individuals(my_population, 1)[0]
    for generation in tqdm(range(NUM_GENERATIONS)):
        logger.info(f"Starting generation {generation + 1}/{NUM_GENERATIONS}")
        num_xover=0
        num_mut=0
        num_succesfull_mut=0
        age_population(my_population)
        

        # Kill eldest individuals
        kill_eldest(my_population, KILL_AGE)

        best_ind = tournament_selection(my_population,TOURNAMENT_SUBSET_SIZE,TOURNAMENT_WINNER_SIZE,ELITISM=ELITISM, elite_count=ELITE_COUNT)

        
        for _ in range(BREED_NEW): # CHECK
            if random.random() < XOVER:
                # Xover the best individual and replace worst individuals
                parent1 = my_population[best_ind[np.random.choice(len(best_ind))]]
                parent2 = my_population[best_ind[np.random.choice(len(best_ind))]]
                child1, child2 = crossover(parent1,parent2)
                my_population.append(child1)
                my_population.append(child2)
                num_xover+=1

            if random.random() < MUTATION:
                # Mutate best individuals and replace worst individuals
                parent = my_population[best_ind[np.random.choice(len(best_ind))]]
                child, success = mutation_w_sa(parent, train_x.shape[0], train_x, train_y)
                num_mut+=1
                if success:                
                    my_population.append(child)
                    num_succesfull_mut+=1

        assign_population_fitness_train(my_population,train_x,train_y)
        assign_population_fitness_val(my_population,val_x,val_y)

        # logger.info(f"xover: {num_xover}, mut: {num_mut}, succesfull mut: {num_succesfull_mut}")
        simplify_constant_population(my_population)
        # Update elites age as 0
        elites = top_n_individuals(my_population, ELITE_COUNT)
        if best_result.fitness_val > elites[0].fitness_val:
            best_result = elites[0]
            logger.info(f"New best result found: {best_result}")
        
        for elite in elites:
            my_population.remove(elite)
            elite.age = 0
            my_population.append(elite)

        # Deduplicate every few iterations
        if generation % DEDUPE_INTERVAL == DEDUPE_INTERVAL-1:
            init_population = len(my_population)
            my_population = deduplicate_population(my_population)
            dedup_population = len(my_population)
            simplify_operation_population(my_population)
            simplified_operations = len(my_population)
            kill_constant(my_population)
            no_constant = len(my_population)
            kill_complex(my_population, MAX_COMPLEXITY)
            no_complex = len(my_population)
            # simplify_population(my_population)
            

            logger.info(f"Initial: {init_population}, Deduplicated: {init_population-dedup_population}, Operation simplified: {dedup_population-simplified_operations}, Constant: {simplified_operations-no_constant}, Complex: {no_constant - no_complex}, Final: {no_complex}")
            # logger.info(f"Population mean complexity: {calculate_mean_complexity(my_population)}")
            logger.info(f"Population mean fitness: {calculate_mean_fitness(my_population)}")


            elites = top_n_individuals(my_population, ELITE_COUNT)
            for elite in elites:
                my_population.remove(elite)
                elite.age = 0
                my_population.append(elite)
                
            best_5 = top_n_individuals(my_population, 5)
            for ind in range(len(best_5)):
                logger.info(f"{ind}. Fitness: {best_5[ind].fitness:.5f}, Fitness_val = {best_5[ind].fitness_val:.5f}, Genome: {best_5[ind].genome}, Age: {best_5[ind].age}, Complexity: {best_5[ind].genome.complexity}")
        
            if best_5[0].fitness_val < 0.0001:
                logger.info(f"Early stopping triggered at generation {generation + 1} due to best fitness = {best_5[0].fitness_val:.5f}")
                break
        if len(my_population) > NUM_POPULATION*2: # If too much individual, remove some
            my_population = top_n_individuals(my_population,NUM_POPULATION*2)
        


        #     # Fit constants of every individual
        # if generation % CONSTANT_FIT_INTERVAL == CONSTANT_FIT_INTERVAL-1:
        #     print(f"Generation {generation}: Mean fitness {calculate_mean_fitness(my_population)}")
        #     for ind in range(len(my_population)):
        #         stronger_inv = fit_constants(my_population[ind],CONSTANT_FIT_ITERATION, train_x, train_y)
        #         my_population[ind] = stronger_inv
        #     assign_population_fitness_val(my_population,val_x,val_y)
        #     print(f"Population mean fitness after constant fit: {calculate_mean_fitness(my_population)}")
        #     best_5 = top_n_individuals(my_population, 5)
        #     for ind in range(len(best_5)):
        #         print(f"{ind}. Fitness: {best_5[ind].fitness:.5f}, Fitness_val = {best_5[ind].fitness_val:.5f}, Genome: {best_5[ind].genome}, Age: {best_5[ind].age}, Complexity: {best_5[ind].genome.complexity}")
        

    my_population = deduplicate_population(my_population)
    #-------------------------------------------------------------------------------------------------------
    assign_population_fitness_train(my_population,train_x,train_y)
    # Sort the population by fitness (ascending), then take the top 5
    top_5_individuals = top_n_individuals(my_population, 15)

    # Print the best individual
    logger.info(f"Best individual: Fitness_val = {best_result.fitness_val}: Genome = {best_result.genome}")
    # Print the top 5 individuals with the minimum fitness in population
    for i, individual in enumerate(top_5_individuals, 1):
        logger.info(f"Top {i}: Fitness = {individual.fitness} Fitness_val = {individual.fitness_val} Genome = {individual.genome}")

    # Print the mean fitness of the population
    logger.info("Mean fitness of the population: ", calculate_mean_fitness(my_population))
    logger.info("Population size: ", len(my_population))
    logger.info(f"Population mean complexity: {calculate_mean_complexity(my_population)}")
    
    return my_population

In [4]:
def evolve_and_migrate(dataset_id):
    # Set up the logger
    logging.basicConfig(
        filename=f"evolve_and_migrate_{dataset_id}.log",  # Log file
        level=logging.INFO,        # Logging level
        format="%(asctime)s - %(levelname)s - %(message)s",  # Log format
        datefmt="%Y-%m-%d %H:%M:%S",  # Date format
    )
    logger = logging.getLogger(__name__)

    problem = np.load(f"data/problem_{dataset_id}.npz")
    x = problem["x"]
    y = problem["y"]
    logger.info(f"x.shape: {x.shape}")
    logger.info(f"y.shape: {y.shape}")
    x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
    logger.info(f"x_train.shape: {x_train.shape}")
    logger.info(f"x_val.shape: {x_val.shape}")
    logger.info(f"y_train.shape: {y_train.shape}")
    logger.info(f"y_val.shape: {y_val.shape}")

    print("x_train.shape: ", x_train.shape)
    print("x_val.shape: ", x_val.shape)
    print("y_train.shape: ", y_train.shape)
    print("y_val.shape: ", y_val.shape)

    conf["NUM_GENERATIONS"]=150
    conf["MAX_COMPLEXITY"]=100
    logger.info("First batch of training started")

    logger.info("my_pop_1 training started")
    my_pop_1 = evolve(x_train,y_train,x_val,y_val,conf,logger)
    logger.info("my_pop_2 training started")
    my_pop_2 = evolve(x_train,y_train,x_val,y_val,conf,logger)
    logger.info("my_pop_3 training started")
    my_pop_3 = evolve(x_train,y_train,x_val,y_val,conf,logger)
    logger.info("my_pop_4 training started")
    my_pop_4 = evolve(x_train,y_train,x_val,y_val,conf,logger)   

    logger.info("Migration between 1-2 and 3-4")
    my_pop_1, my_pop_2 = migration(my_pop_1, my_pop_2, 15)
    my_pop_3, my_pop_4 = migration(my_pop_3, my_pop_4, 15)

    conf["MAX_COMPLEXITY"]=150
    logger.info("Second batch of training started")

    logger.info("my_pop_1 training started")
    my_pop_1 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_1)
    logger.info("my_pop_2 training started")
    my_pop_2 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_2)
    logger.info("my_pop_3 training started")
    my_pop_3 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_3)
    logger.info("my_pop_4 training started")
    my_pop_4 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_4)    

    logger.info("Migration between 1-4 and 2-3")
    my_pop_1, my_pop_4 = migration(my_pop_1, my_pop_4, 15)
    my_pop_2, my_pop_3 = migration(my_pop_2, my_pop_3, 15)

    conf["MAX_COMPLEXITY"]=200
    logger.info("Third batch of training started")
    
    logger.info("my_pop_1 training started")
    my_pop_1 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_1)
    logger.info("my_pop_2 training started")
    my_pop_2 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_2)
    logger.info("my_pop_3 training started")
    my_pop_3 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_3)
    logger.info("my_pop_4 training started")
    my_pop_4 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_4)    

    return my_pop_1, my_pop_2, my_pop_3, my_pop_4


In [None]:
pop_1, pop_2, pop_3, pop_4 = evolve_and_migrate(3)

x_train.shape:  (3, 4000)
x_val.shape:  (3, 1000)
y_train.shape:  (4000,)
y_val.shape:  (1000,)


 24%|██▍       | 36/150 [06:30<21:38, 11.39s/it]

In [None]:
problem = np.load("data/problem_3.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
print("y.shape:", y.shape)
x_train, y_train, x_val, y_val = split_dataset(x[:,:1000],y[:1000],0.8)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
my_pop_3_1 = evolve(x_train,y_train,x_val,y_val,conf)
my_pop_3_2 = evolve(x_train,y_train,x_val,y_val,conf)

In [None]:
problem = np.load("data/problem_4.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
print("y.shape:", y.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
my_pop_4 = evolve(x_train,y_train,x_val,y_val,conf)


In [None]:

problem = np.load("data/problem_5.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
print("y.shape:", y.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
my_pop_5 = evolve(x_train,y_train,x_val,y_val,conf)


In [None]:

problem = np.load("data/problem_6.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
print("y.shape:", y.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
my_pop_6 = evolve(x_train,y_train,x_val,y_val,conf)

In [None]:
problem = np.load("data/problem_7.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
print("y.shape:", y.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
my_pop_7 = evolve(x_train,y_train,x_val,y_val,conf)


In [None]:

problem = np.load("data/problem_8.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
print("y.shape:", y.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
my_pop_8 = evolve(x_train,y_train,x_val,y_val,conf)


In [None]:
problem = np.load("data/problem_0.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
print("y.shape:", y.shape)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
my_pop_0 = evolve(x_train,y_train,x_val,y_val,conf)

In [None]:
problem = np.load("data/problem_1.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
print("y.shape:", y.shape)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
my_pop_1 = evolve(x_train,y_train,x_val,y_val,conf)

In [None]:
problem = np.load("data/problem_2.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
print("y.shape:", y.shape)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
my_pop_2 = evolve(x_train,y_train,x_val,y_val,conf)

In [None]:
problem = np.load("data/problem_3.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
print("y.shape:", y.shape)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)

In [None]:

conf["NUM_GENERATIONS"]=200
my_pop_3_1 = evolve(x_train,y_train,x_val,y_val,conf)

In [None]:

unary_operators=[np.tan, np.log, np.exp]
binary_operators=[np.add, np.multiply, np.divide, np.subtract]
conf["NUM_GENERATIONS"]=200
conf["BINARY_OPERATORS"] = binary_operators
conf["UNARY_OPERATORS"] = unary_operators
my_pop_3_2 = evolve(x_train,y_train,x_val,y_val,conf)



In [11]:
migration_pop_1 = copy.deepcopy(my_pop_3_1)
migration_pop_2 = copy.deepcopy(my_pop_3_2)

In [None]:
print('len(migration_pop_1):', len(migration_pop_1))
print('len(migration_pop_2):', len(migration_pop_2))

In [None]:
migration_pop_1, migration_pop_2 = migration(migration_pop_1, migration_pop_2, 20)

unary_operators=[np.sin, np.cos, np.exp, np.abs, np.log, np.tan]
binary_operators=[np.add, np.subtract, np.multiply, np.divide]
conf["BINARY_OPERATORS"] = binary_operators
conf["UNARY_OPERATORS"] = unary_operators

In [None]:
print('len(migration_pop_1):', len(migration_pop_1))
print('len(migration_pop_2):', len(migration_pop_2))

In [None]:
migration_pop_1 = evolve(x_train,y_train,x_val,y_val,conf,migration_pop_1)

In [None]:
migration_pop_2 = evolve(x_train,y_train,x_val,y_val,conf,migration_pop_2)

In [None]:
problem = np.load("data/problem_4.npz")
x = problem["x"]
print("x.shape:", x.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
print("y.shape:", y.shape)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
my_pop_4 = evolve(x_train,y_train,x_val,y_val,conf)

In [None]:
problem = np.load("data/problem_5.npz")
x = problem["x"]
print("x.shape:", x.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
print("y.shape:", y.shape)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
my_pop_5 = evolve(x_train,y_train,x_val,y_val,conf)

In [None]:
problem = np.load("data/problem_6.npz")
x = problem["x"]
print("x.shape:", x.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
print("y.shape:", y.shape)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
my_pop_6 = evolve(x_train,y_train,x_val,y_val,conf)

In [None]:
problem = np.load("data/problem_7.npz")
x = problem["x"]
print("x.shape:", x.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
print("y.shape:", y.shape)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
my_pop_7 = evolve(x_train,y_train,x_val,y_val,conf)

In [None]:
problem = np.load("data/problem_8.npz")
x = problem["x"]
print("x.shape:", x.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
print("y.shape:", y.shape)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
my_pop_8 = evolve(x_train,y_train,x_val,y_val,conf)