## Patrones Paralelos ~ Algoritmos Genéticos

In [6]:
import random
import numpy as np
import pandas as pd 
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error as mse
import matplotlib.pyplot as plt
from pathlib import Path
import multiprocessing
import time

In [2]:
# se abre el dataframe y se eligen las características y el objetivo
data=pd.read_csv('interpretabilidad.csv').sort_values(by=['interpretability'])
X=data[['learning_rate','warmup_epochs','mask_period','mem','lr_reg','lambda_reg']]
Y=data[['interpretability']]

X_train=X[:336]
y_train=Y[:336]

X_test=X[336:]
y_test=Y[336:]

In [34]:
def fitness(x_tr, y_tr):
    #print('aqui')
    reg=RandomForestRegressor(n_estimators=300, verbose=False)
    reg.fit(x_tr.values, y_tr.values.ravel())
    #print('aqui2')
    # Evaluate the model and find the fitness
    y_pred=[]
    y_true=[]
    for index, row in X_test.iterrows():
        y_pred.append(reg.predict([row.tolist()])[0])

    for index, row in y_test.iterrows():
        y_true.append(row.tolist()[0])

    # Because the higher the fitness value is, the better solution
    # we use 1-mse as the fitness function
    return 1.0 - mse(y_true, y_pred)

def evaluate_chromosome(chromosome):
    return fitness(X_train.iloc[np.where(chromosome==1)], y_train.iloc[np.where(chromosome==1)])

def target_initial_population(n_chromosomes, chromosome_size, queue):
    my_chromosomes=[]
    f_rates=[]
    for i in range(n_chromosomes):
        my_chromosomes.append(np.random.randint(2, size=chromosome_size))
        f_rates.append(evaluate_chromosome(my_chromosomes[-1]))
    queue.put([my_chromosomes, f_rates])
    


class geneticFeatureSelection(object):

    def __init__(self,
                 n_hypotheses=8,
                 r_crossover=0.4,
                 m_rate=0.2,
                 chromosome_size=336,
    crossover_mask=[0 if i<168 else 1 for i in range(336)],
				  n_populations=200):
        '''
        n_hypotheses: the number of hypotheses including in the population
        r_crossover: the fraction of population that will replaced by crossover step
        m_rate: the mutation rate
        '''
        self.n_hypotheses=n_hypotheses
        self.r_crossover=r_crossover
        self.m_rate=m_rate
        self.chromosome_size=chromosome_size
        self.crossover_mask=crossover_mask
        self.n_populations=n_populations
        self.population=[]
        self.fitness=[]

    def fitness_function(self, x_tr, y_tr):
        reg=RandomForestRegressor(n_estimators=300, verbose=False)
        reg.fit(x_tr.values, y_tr.values.ravel())

        # Evaluate the model and find the fitness
        y_pred=[]
        y_true=[]
        for index, row in X_test.iterrows():
            y_pred.append(reg.predict([row.tolist()])[0])

        for index, row in y_test.iterrows():
            y_true.append(row.tolist()[0])

        # Because the higher the fitness value is, the better solution
        # we use 1-mse as the fitness function
        return 1.0 - mse(y_true, y_pred)

    def evaluate_chromosome(self, chromosome):
        self.fitness.append(self.fitness_function(X_train.iloc[np.where(chromosome==1)], y_train.iloc[np.where(chromosome==1)]))
        #log.write(str(self.population[-1].tolist())[1:-1]+','+str(self.fitness[-1]))

    def init_population(self):
        '''
        Create the initial population
        '''
        
        for i in range(self.n_hypotheses):
            self.population.append(np.random.randint(2, size=self.chromosome_size))
            self.evaluate_chromosome(self.population[-1])
    
    def init_population_parallel(self):
        n_threads=2
        threads=[]
        queues=[]
        for i in range(n_threads):
            queues.append(multiprocessing.Queue())
            threads.append(multiprocessing.Process(target=target_initial_population,
                                                   args=[int(self.n_hypotheses/n_threads),
                                                   self.chromosome_size,
                                                   queues[i]]))
        
        for thread in threads:
            thread.start()
        
        p=[]
        f=[]
        for i in queues:
            p_p, f_p = i.get()
            p.append(p_p)
            f.append(f_p)
        self.population = np.array(p).reshape(self.n_hypotheses, self.chromosome_size)
        self.fitness = np.array(f).reshape(self.n_hypotheses,)

    def select_k_best_mutation(self):
        # get the index of the best 
        indexes=np.argsort(self.fitness)[::-1]
        #portion=len(indexes)-int(self.m_rate*len(indexes))
        # Update the fitness and the new population
        self.fitness = np.array(self.fitness)[indexes[:self.n_hypotheses]].tolist()
        self.population = np.array(self.population)[indexes[:self.n_hypotheses]].tolist()

    def select_k_best_crossover(self):
        # get the index of the best 
        indexes=np.argsort(self.fitness)[::-1]
        portion=len(indexes)-int(self.r_crossover*len(indexes))
        # Update the fitness and the new population
        self.fitness = np.array(self.fitness)[indexes[:portion]].tolist()
        self.population = np.array(self.population)[indexes[:portion]].tolist()

    def crossover_step(self, chromosome_1, chromosome_2):
        return [chromosome_1[i] if self.crossover_mask[i]==0 else chromosome_2[i] for i in range(len(self.crossover_mask))]

    def crossover(self):
        for i in range(self.n_hypotheses - len(self.population)):
            rndm_chr=np.random.choice(np.arange(len(self.population)), 2, replace=False)
            self.population.append(np.array(self.crossover_step(self.population[rndm_chr[0]], self.population[rndm_chr[1]])))
            self.evaluate_chromosome(self.population[-1])

    def mutation(self):
        for i in range(int(len(self.population)*self.m_rate)):
            new_chromosome=np.array([i for i in self.population[np.random.choice(np.arange(len(self.population)), 1)[0]]])
            self.population.append(new_chromosome)
            self.evaluate_chromosome(new_chromosome)

    def run(self):
        s = time.perf_counter()
        #self.init_population()
        self.init_population_parallel()
        t = time.perf_counter()
        print(f"Programa terminado en {t-s} segundos")
        

        
my_experiment=geneticFeatureSelection()
my_experiment.run()

Programa terminado en 8.470049581999774 segundos
