In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from statsmodels.regression.linear_model import OLS
import math
import time
import warnings
import re

<h1>1. Preprocesamiento</h1>

<h2>1.1 Normalización</h2>

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
warnings.filterwarnings('ignore')

In [3]:
df_0 = pd.read_csv('Data\Concrete_Data_Yeh.csv')

df_p = preprocessing.normalize(df_0, axis=0)
df = pd.DataFrame(df_p, columns=df_0.columns)
df.describe()

Unnamed: 0,cement,slag,flyash,water,superplasticizer,coarseaggregate,fineaggregate,age,csMPa
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,0.029208,0.020274,0.020141,0.030946,0.022451,0.03106,0.030993,0.018259,0.028241
std,0.010856,0.023672,0.023786,0.00364,0.021616,0.002482,0.003212,0.02526,0.013172
min,0.010596,0.0,0.0,0.020759,0.0,0.025571,0.023798,0.0004,0.001837
25%,0.019984,0.0,0.0,0.028105,0.0,0.029754,0.029285,0.002799,0.018694
50%,0.028349,0.006036,0.0,0.031531,0.023158,0.030903,0.03123,0.011197,0.027158
75%,0.036359,0.03922,0.043969,0.032724,0.036908,0.032863,0.033013,0.022393,0.036375
max,0.056096,0.098607,0.074372,0.042098,0.116515,0.036554,0.039768,0.145956,0.065126


<h2>1.2 Selección de por GA con AIC</h2>

<h3>1.2.1 Definición de función a optimizar</h3>

In [4]:
## Definición funcion a optimizar

def aic_criterion(df, var_objetivo):
    
    n_var = len(df.drop(columns=[var_objetivo]).columns)
    n = len(df)
    explicativas = list(df.drop(columns=[var_objetivo]).columns)
    
    y = df[var_objetivo]
    X = df[explicativas]
    
    X_val, X_other, y_val, y_other = train_test_split(X, y, test_size=0.80, random_state=42, shuffle=False)
    X_train, X_test, y_train, y_test = train_test_split(X_other, y_other, test_size=0.40, shuffle=False)
    
    del X_other
    del y_other
    
    if X_train.shape[1] > 0:
        reg = OLS(y_train, X_train).fit()
        return(reg.aic)
    else:
        return(10**10)

In [165]:
## Definición funcion a optimizar

def residuales_f(df, df_pend, var_objetivo):
    
    n_var = len(df.drop(columns=[var_objetivo]).columns)
    explicativas = list(df.drop(columns=[var_objetivo]).columns)
    
    y = df[var_objetivo]
    X = df[explicativas]
    
    X_val, X_other, y_val, y_other = train_test_split(X, y, test_size=0.80, random_state=42, shuffle=False)
    X_train, X_test, y_train, y_test = train_test_split(X_other, y_other, test_size=0.40, shuffle=False)
    
    del X_other
    del y_other
    
    variables = list(df_pend.index)
    valores = list(df_pend)

    if X_train.shape[1] > 0:
        
        ecuacion = []
        for i in range(len(variables)):
            ecuacion = ecuacion + ['(' + str(valores[i]) + "*df_val['" + str(variables[i]) + "']) +" ]
        ecuacion = ' '.join(ecuacion)[:-2]

        df_val = pd.concat([X_val, y_val], axis=1)
        a = var_objetivo + '_est'
        df_val[a] = eval(ecuacion)
        df_val['residuos'] = df_val[a] - df_val[var_objetivo]

        res_2 = sum(df_val['residuos'].apply(lambda x: x**2)) / len(df_val)

        return(res_2)
    else:
        return(10**10)
    


<h3>1.2.2 Definición de operadores de cruce y mutación</h3>

In [72]:
## Generar operadores de cruce y mutacion

def cruce(cromosoma_0, cromosoma_1, tipo_cruce):
    
    rng = np.random.default_rng()
    largo_cromosoma = len(cromosoma_0)
    rango_cromosoma = range(largo_cromosoma)
    
    if tipo_cruce == 'punto unico':
        
        punto = rng.choice(rango_cromosoma, size=1, replace=False)[0]
        
        descendencia_0 = np.concatenate((cromosoma_0[:punto], cromosoma_1[punto:]), axis = 0)
        descendencia_1 = np.concatenate((cromosoma_1[:punto], cromosoma_0[punto:]), axis = 0)
        
        return(descendencia_0, descendencia_1)
    
    elif tipo_cruce == 'dos puntos':
        
        puntos = rng.choice(rango_cromosoma, size=2, replace=False)
        punto_0 = min(puntos[0], puntos[1])
        punto_1 = max(puntos[0], puntos[1])
        
        descendencia_0 = np.concatenate((cromosoma_0[:punto_0]
                                         , cromosoma_1[punto_0:punto_1]
                                         , cromosoma_0[punto_1:]), axis = 0)
        descendencia_1 = np.concatenate((cromosoma_1[:punto_0]
                                         , cromosoma_0[punto_0:punto_1]
                                         , cromosoma_1[punto_1:]), axis = 0)
        
        return(descendencia_0, descendencia_1)
    
    elif tipo_cruce == 'uniforme':

        padre_0 = np.random.randint(2,size=(len(cromosoma_0), 1))
        padre_1 = (padre_0 - 1) * (- 1)
        
        descendencia_0 = [(padre_0[i] * cromosoma_0[i])[0] + (padre_1[i] * cromosoma_1[i])[0] for i in range(len(cromosoma_0))]
        descendencia_1 = [(padre_0[i] * cromosoma_1[i])[0] + (padre_1[i] * cromosoma_0[i])[0] for i in range(len(cromosoma_0))]
        
        return(descendencia_0, descendencia_1)

    
    
def mutacion(cromosoma_0, df, tipo_dato):
    
    rng = np.random.default_rng()
    largo_cromosoma = len(cromosoma_0)
    rango_cromosoma = range(largo_cromosoma)
    
    gen_mutacion = rng.choice(rango_cromosoma, size=1, replace=False)[0]
    
    if tipo_dato == 'binario':
        
        if cromosoma_0[gen_mutacion] == 1:
            nuevo_gen = 0
        else:
            nuevo_gen = 1
    
    elif tipo_dato == 'flotante':
        
        std_gen = np.std(np.array(df.iloc[:, gen_mutacion]))
        
        nuevo_gen = cromosoma_0[gen_mutacion] + np.random.normal(0, std_gen, 1)
        
    cromosoma_0 = np.concatenate((cromosoma_0[:gen_mutacion]
                                  ,np.array([nuevo_gen])[0]
                                  ,cromosoma_0[gen_mutacion+1:]), axis = 0)
    
    return(cromosoma_0)

<h3>1.2.3 Definición de estrategias de selección</h3>

In [175]:
## Generar funciones de seleccion

def seleccion(df, factor_seleccion, tipo_seleccion, tamano_torneo, tamano_elitismo):
    
    df['seleccionado'] = 0
    
    df_c = df[df['clase']=='C'].reset_index(drop = True)
    df_p = df[df['clase']=='P'].reset_index(drop = True)
    
    if tipo_seleccion == 'ruleta':
        
        df_c['dummy'] = 0
        max_fitness = max(df_c['fitness'])
        min_fitness= min(df_c['fitness'])
        
        if (min_fitness < 0) & (max_fitness >= 0):
            df_c['fitness_escalado'] = (df_c['fitness'] - max_fitness - 1) * (-1)
        elif min_fitness < 0:
            df_c['fitness_escalado'] = df_c['fitness'] * (-1)
        else:
            df_c['fitness_escalado'] = df_c['fitness']
            
            
        total_aic_esc = sum(df_c['fitness_escalado'])
        df_c['prob'] = df_c['fitness_escalado'] / total_aic_esc
        df_c['prob_acum'] = df_c.groupby(['dummy'])['prob'].cumsum()
        df_c['prob_acum_lag'] = df_c['prob_acum'].shift(1).fillna(0)
            
        while (sum(df_c['seleccionado']) / len(df_c)) < factor_seleccion:
                
            rand = np.random.uniform(0, 1)
            df_c['seleccionado'] =  df_c.apply(lambda x: 1 if ((x.prob_acum_lag < rand) 
                                                           & (rand <= x.prob_acum)) else x.seleccionado, axis = 1)

        df_c = df_c.drop(columns=['dummy','fitness_escalado','prob','prob_acum','prob_acum_lag'])
                
    elif tipo_seleccion == 'torneo':
        
        n = len(df_c) - 1
        df_c['fila'] = df_c.index
        
        while (sum(df_c['seleccionado']) / len(df_c)) < factor_seleccion:
                
            rand = list(np.random.choice(np.array(list(df_c[df_c['seleccionado']!=1].index)), tamano_torneo))
            
            df_c['en_torneo'] = df_c.apply(lambda x: 1 if x.fila in rand else 0, axis = 1)
            df_c['min_fitness'] = min(df_c[df_c['en_torneo'] == 1]['fitness'])
            df_c['seleccionado'] =  df_c.apply(lambda x: 1 if ((x.min_fitness == x.fitness)
                                                               & (x.en_torneo == 1)) else x.seleccionado, axis = 1)

        df_c = df_c.drop(columns=['fila','en_torneo','min_fitness'])
    
    if tamano_elitismo > 0:
        df_p['dummy'] = 0
        df_p['orden'] = df_p.sort_values(['fitness']).groupby(['dummy']).cumcount() + 1
        
        df_p['seleccionado'] = df_p.apply(lambda x: 1 if x.orden <= tamano_elitismo else x.seleccionado, axis = 1)
        
        df_p = df_p.drop(columns=['dummy','orden'])
        
    df_final = df_c.append(df_p).reset_index(drop = True)
        
    return(df_final)

<h3>1.2.4 Función Conjunta Final</h3>

In [8]:
# Generar función iterable

def ga_optimizacion(df
                    , var_objetivo
                    , df_parametros
                    , tamano_campeones
                    , factor_mutacion
                    , tipo_dato
                    , factor_seleccion
                    , tipo_seleccion
                    , tamano_torneo
                    , tamano_elitismo
                    , tipo_cruce):
    # Parametros
    
    df_param = df_parametros.copy()
    parametros = list(df_param.columns)  
    
    df_resultados = pd.DataFrame([], columns=['iteracion'
                                              , 'media_fitness'
                                              , 'nro_cromosomas'
                                              , 'nro_variables'
                                              , 'segundos'])
    
    iteracion = 0
    
    contador_crom = 0

    # Tiempo inicio
        
    tiempo_inicio = time.time()
    
    while (len(df_param) > tamano_campeones) or (contador_crom <= 20):
        
        # Marcacion de padres

        df_param['clase'] = 'P'
        df_param['fitness'] = 10 ** 10

        # Nueva Generación por cruce

        df_param = df_param.sample(frac=1).reset_index(drop=True)
        df_param['fila'] = df_param.index + 1
        df_param['grupo_cruce'] = (df_param['fila'].apply(lambda x: x if x % 2 == 0 else x + 1) / 2) - 1

        df_param['seleccionado'] = 0
        df_param['con_mutacion'] = 0
        
        fila_i = len(df_param)

        for j in list(df_param['grupo_cruce'].unique()):

            df_param_2 = df_param[df_param['grupo_cruce']==j][parametros]

            cromosoma_0 = np.array(df_param_2.iloc[0,:])
            cromosoma_1 = np.array(df_param_2.iloc[1,:])

            descend_0, descend_1 = cruce(cromosoma_0, cromosoma_1, tipo_cruce)

            descend_0 = list(descend_0) + ['C'] + [0] + [0] + [j] + [0] + [0]
            descend_1 = list(descend_1) + ['C'] + [0] + [0] + [j] + [0] + [0]

            fila_i = fila_i + 1
            df_param.loc[fila_i] = list(descend_0)
            fila_i = fila_i + 1
            df_param.loc[fila_i] = list(descend_1)

            fila_i = fila_i + 1

        # Generacion de fitness

        for i in range(len(df_param)):

            param_selec = df_param[parametros].iloc[i,:]
            param_selec = list(param_selec[param_selec==1].index) + [var_objetivo]

            df_param['fitness'].iloc[i] = aic_criterion(df[param_selec], var_objetivo)

        df_param = df_param.reset_index(drop = True)

        # Seleccionar mejores e incluir elitismo

        df_param = seleccion(df_param
                                  , factor_seleccion
                                  , tipo_seleccion
                                  , tamano_torneo
                                  , tamano_elitismo)

        df_param = df_param[df_param['seleccionado']==1].reset_index(drop = True)

        df_param['clase'] = 'P'

        
        # Mutación

        df_param = df_param.sample(frac=1).reset_index(drop=True)
        df_param['fila'] = df_param.index + 1
        df_param['con_mutacion'] = df_param['fila'].apply(lambda x: 1 if x < (len(df_param) * factor_mutacion) else 0)

        df_param_m = df_param[df_param['con_mutacion']==1].reset_index(drop = True).copy()

        df_param = df_param[df_param['con_mutacion']==0].reset_index(drop = True).copy()

        
        for k in range(len(df_param_m)):

            cromosoma_antiguo = np.array(df_param_m.drop(columns=['clase'
                                                                       ,'fitness'
                                                                       ,'fila'
                                                                       ,'grupo_cruce'
                                                                       ,'seleccionado'
                                                                       ,'con_mutacion']).iloc[k])

            cromosoma_nuevo = mutacion(cromosoma_antiguo
                                       ,df_param.drop(columns=['clase'
                                                                    ,'fitness'
                                                                    ,'fila'
                                                                    ,'grupo_cruce'
                                                                    ,'seleccionado'
                                                                    ,'con_mutacion'])
                                       ,tipo_dato)

            df_param_m.iloc[k] = list(cromosoma_nuevo) + list(df_param_m[['clase'
                                                                                    ,'fitness'
                                                                                    ,'fila'
                                                                                    ,'grupo_cruce'
                                                                                    ,'seleccionado'
                                                                                    ,'con_mutacion']].iloc[k])

        df_param = df_param.append(df_param_m).reset_index(drop = True)

        
        if len(df_param) % 2 != 0:
            df_param = df_param.sort_values(by = 'fitness').reset_index(drop = True)[0:len(df_param)-1]
        
        # Medición resultados
        
        media_fitness = np.mean(df_param['fitness'])
        nro_cromosomas = len(df_param)
        nro_var = np.mean(df_param.drop(columns=['clase'
                                                      ,'fitness'
                                                      ,'fila'
                                                      ,'grupo_cruce'
                                                      ,'seleccionado']).sum(axis=1))
        iteracion = iteracion + 1
        
        try:
            ult_nro_cromosomas = df_resultados[len(df_resultados)-1:]['nro_cromosomas'].reset_index(drop = True)[0]
        except:
            ult_nro_cromosomas = 0

        print('iteracion: '
              , iteracion
              , ' | fitness: '
              , media_fitness
              , ' | cromosomas: '
              , nro_cromosomas
              , ' | variables: '
              , nro_var
              , ' | segundos: '
              , (time.time() - tiempo_inicio)
             )

        df_resultados.loc[len(df_resultados)] = [iteracion
                                                 , media_fitness
                                                 , nro_cromosomas
                                                 , nro_var
                                                 , time.time() - tiempo_inicio]
        
        if nro_cromosomas == ult_nro_cromosomas:
            contador_crom = contador_crom + 1

    return(df_param, df_resultados)

In [181]:
# Generar función iterable

def ga_optimizacion2(df
                    , var_objetivo
                    , df_parametros
                    , tamano_campeones
                    , factor_mutacion
                    , tipo_dato
                    , factor_seleccion
                    , tipo_seleccion
                    , tamano_torneo
                    , tamano_elitismo
                    , tipo_cruce):
    # Parametros
    
    df_param = df_parametros.copy()
    parametros = list(df_param.columns)  
    
    df_resultados = pd.DataFrame([], columns=['iteracion'
                                              , 'media_fitness'
                                              , 'nro_cromosomas'
                                              , 'nro_variables'
                                              , 'segundos'])
    
    iteracion = 0
    
    contador_crom = 0

    # Tiempo inicio
        
    tiempo_inicio = time.time()
    
    while (len(df_param) > tamano_campeones) or (contador_crom <= 20):
        
        # Marcacion de padres

        df_param['clase'] = 'P'
        df_param['fitness'] = 10 ** 10

        # Nueva Generación por cruce

        df_param = df_param.sample(frac=1).reset_index(drop=True)
        df_param['fila'] = df_param.index + 1
        df_param['grupo_cruce'] = (df_param['fila'].apply(lambda x: x if x % 2 == 0 else x + 1) / 2) - 1

        df_param['seleccionado'] = 0
        df_param['con_mutacion'] = 0
        
        fila_i = len(df_param)

        for j in list(df_param['grupo_cruce'].unique()):

            df_param_2 = df_param[df_param['grupo_cruce']==j][parametros]

            cromosoma_0 = np.array(df_param_2.iloc[0,:])
            cromosoma_1 = np.array(df_param_2.iloc[1,:])

            descend_0, descend_1 = cruce(cromosoma_0, cromosoma_1, tipo_cruce)

            descend_0 = list(descend_0) + ['C'] + [0] + [0] + [j] + [0] + [0]
            descend_1 = list(descend_1) + ['C'] + [0] + [0] + [j] + [0] + [0]

            fila_i = fila_i + 1
            df_param.loc[fila_i] = list(descend_0)
            fila_i = fila_i + 1
            df_param.loc[fila_i] = list(descend_1)

            fila_i = fila_i + 1

        # Generacion de fitness
        
        for i in range(len(df_param)):

            param_selec = df_param[parametros].iloc[i,:]

            df_param['fitness'].iloc[i] = residuales_f(df, param_selec, var_objetivo)

        df_param = df_param.reset_index(drop = True)
        
        # Seleccionar mejores e incluir elitismo

        df_param = seleccion(df_param
                                  , factor_seleccion
                                  , tipo_seleccion
                                  , tamano_torneo
                                  , tamano_elitismo)

        df_param = df_param[df_param['seleccionado']==1].reset_index(drop = True)

        df_param['clase'] = 'P'

        # Mutación

        df_param = df_param.sample(frac=1).reset_index(drop=True)
        df_param['fila'] = df_param.index + 1
        df_param['con_mutacion'] = df_param['fila'].apply(lambda x: 1 if x < (len(df_param) * factor_mutacion) else 0)

        df_param_m = df_param[df_param['con_mutacion']==1].reset_index(drop = True).copy()

        df_param = df_param[df_param['con_mutacion']==0].reset_index(drop = True).copy()

        
        for k in range(len(df_param_m)):

            cromosoma_antiguo = np.array(df_param_m.drop(columns=['clase'
                                                                       ,'fitness'
                                                                       ,'fila'
                                                                       ,'grupo_cruce'
                                                                       ,'seleccionado'
                                                                       ,'con_mutacion']).iloc[k])

            cromosoma_nuevo = mutacion(cromosoma_antiguo
                                       ,df_param.drop(columns=['clase'
                                                                    ,'fitness'
                                                                    ,'fila'
                                                                    ,'grupo_cruce'
                                                                    ,'seleccionado'
                                                                    ,'con_mutacion'])
                                       ,tipo_dato)

            df_param_m.iloc[k] = list(cromosoma_nuevo) + list(df_param_m[['clase'
                                                                                    ,'fitness'
                                                                                    ,'fila'
                                                                                    ,'grupo_cruce'
                                                                                    ,'seleccionado'
                                                                                    ,'con_mutacion']].iloc[k])

        df_param = df_param.append(df_param_m).reset_index(drop = True)

        
        if len(df_param) % 2 != 0:
            df_param = df_param.sort_values(by = 'fitness').reset_index(drop = True)[0:len(df_param)-1]
        
        # Medición resultados
        
        media_fitness = np.mean(df_param['fitness'])
        nro_cromosomas = len(df_param)
        nro_var = np.mean(df_param.drop(columns=['clase'
                                                      ,'fitness'
                                                      ,'fila'
                                                      ,'grupo_cruce'
                                                      ,'seleccionado']).sum(axis=1))
        iteracion = iteracion + 1
        
        try:
            ult_nro_cromosomas = df_resultados[len(df_resultados)-1:]['nro_cromosomas'].reset_index(drop = True)[0]
        except:
            ult_nro_cromosomas = 0

        print('iteracion: '
              , iteracion
              , ' | fitness: '
              , media_fitness
              , ' | cromosomas: '
              , nro_cromosomas
              , ' | variables: '
              , nro_var
              , ' | segundos: '
              , (time.time() - tiempo_inicio)
             )

        df_resultados.loc[len(df_resultados)] = [iteracion
                                                 , media_fitness
                                                 , nro_cromosomas
                                                 , nro_var
                                                 , time.time() - tiempo_inicio]
        
        if nro_cromosomas == ult_nro_cromosomas:
            contador_crom = contador_crom + 1

    return(df_param, df_resultados)

In [127]:
# Generar función iterable

def ga_optimizacion2(df
                    , var_objetivo
                    , df_parametros
                    , tamano_campeones
                    , factor_mutacion
                    , tipo_dato
                    , factor_seleccion
                    , tipo_seleccion
                    , tamano_torneo
                    , tamano_elitismo
                    , tipo_cruce):
    # Parametros
    
    df_param = df_parametros.copy()
    parametros = list(df_param.columns)  
    
    df_resultados = pd.DataFrame([], columns=['iteracion'
                                              , 'media_fitness'
                                              , 'nro_cromosomas'
                                              , 'nro_variables'
                                              , 'segundos'])
    


        # Marcacion de padres

    df_param['clase'] = 'P'
    df_param['fitness'] = 10 ** 10

        # Nueva Generación por cruce

    df_param = df_param.sample(frac=1).reset_index(drop=True)
    df_param['fila'] = df_param.index + 1
    df_param['grupo_cruce'] = (df_param['fila'].apply(lambda x: x if x % 2 == 0 else x + 1) / 2) - 1

    df_param['seleccionado'] = 0
    df_param['con_mutacion'] = 0
        
    fila_i = len(df_param)

    for j in list(df_param['grupo_cruce'].unique()):

        df_param_2 = df_param[df_param['grupo_cruce']==j][parametros]

        cromosoma_0 = np.array(df_param_2.iloc[0,:])
        cromosoma_1 = np.array(df_param_2.iloc[1,:])

        descend_0, descend_1 = cruce(cromosoma_0, cromosoma_1, tipo_cruce)

        descend_0 = list(descend_0) + ['C'] + [0] + [0] + [j] + [0] + [0]
        descend_1 = list(descend_1) + ['C'] + [0] + [0] + [j] + [0] + [0]

        fila_i = fila_i + 1
        df_param.loc[fila_i] = list(descend_0)
        fila_i = fila_i + 1
        df_param.loc[fila_i] = list(descend_1)

        fila_i = fila_i + 1

        # Generacion de fitness
        
    for i in range(len(df_param)):

        param_selec = df_param[parametros].iloc[i,:]

        df_param['fitness'].iloc[i] = residuales_f(df, param_selec, var_objetivo)

    df_param = df_param.reset_index(drop = True)

    return(df_param)

<h3>1.2.5 Generación de dataset con nuevos parámetros</h3>

In [10]:
## Generación de realciones

columnas = list(df.drop(columns=['csMPa']).columns)
for i in columnas:
    columnas_2 = [x for x in columnas if x not in i]
    for j in columnas_2:
        nombre_columna = i + '-' + j
        df[nombre_columna] = df[i] / df[j]

## Quitar varibales con valores no indeterminados

df = df[list(df.describe().replace([np.inf, -np.inf], np.nan).iloc[-1].dropna().index)]

pattern = re.compile(r'^.*age.*$')
df = df.drop(columns = [x for x in df.columns if ((pattern.match(x)) and (x != 'age'))])

## Generación de Exponenciales

#columnas = list(df.drop(columns=['csMPa']).columns)

#for i in columnas:
    #nombre_columna = 'exp_' + i
    #df[nombre_columna] = df[i].apply(lambda x: math.exp(x))

<h3>1.2.6 Generación de dataset con nuevas variables</h3>

In [11]:
np.random.seed(1984)

variables = list(df.drop(columns=['csMPa']).columns)
df_variables = pd.DataFrame([np.random.choice(2, len(variables), p=[0.9, 0.1]) for x in range(500)]
                            ,columns=variables)

<h3>1.2.7 Pruebas Seleccion de Variables</h3>

In [12]:
np.random.seed(1985)

df_prueba_0 = ga_optimizacion(df = df
                              , var_objetivo = 'csMPa'
                              , df_parametros = df_variables
                              , tamano_campeones = 38
                              , factor_mutacion = 0.1
                              , tipo_dato = 'binario'
                              , factor_seleccion = 0.95
                              , tipo_seleccion = 'ruleta'
                              , tamano_torneo = 5
                              , tamano_elitismo = 1
                              , tipo_cruce = 'punto unico'
                             )

iteracion:  1  | fitness:  -2958.501561665134  | cromosomas:  476  | variables:  3.5840336134453783  | segundos:  47.32199835777283
iteracion:  2  | fitness:  -2973.378477849116  | cromosomas:  454  | variables:  3.700440528634361  | segundos:  90.57001113891602
iteracion:  3  | fitness:  -2973.521123170317  | cromosomas:  432  | variables:  3.8194444444444446  | segundos:  129.26500129699707
iteracion:  4  | fitness:  -2992.2248566560547  | cromosomas:  412  | variables:  3.970873786407767  | segundos:  166.2229986190796
iteracion:  5  | fitness:  -3001.2622712215416  | cromosomas:  392  | variables:  4.089285714285714  | segundos:  200.46800112724304
iteracion:  6  | fitness:  -3002.781243206107  | cromosomas:  374  | variables:  4.195187165775401  | segundos:  232.2250108718872
iteracion:  7  | fitness:  -3027.250608622308  | cromosomas:  356  | variables:  4.342696629213483  | segundos:  259.40899777412415
iteracion:  8  | fitness:  -3026.538451534205  | cromosomas:  340  | variabl

iteracion:  65  | fitness:  -3317.9125901050415  | cromosomas:  38  | variables:  11.605263157894736  | segundos:  634.3499977588654
iteracion:  66  | fitness:  -3320.703934624538  | cromosomas:  38  | variables:  11.710526315789474  | segundos:  635.5359983444214
iteracion:  67  | fitness:  -3314.1184801583913  | cromosomas:  38  | variables:  11.842105263157896  | segundos:  636.6119978427887
iteracion:  68  | fitness:  -3322.1001581727696  | cromosomas:  38  | variables:  11.868421052631579  | segundos:  637.686998128891
iteracion:  69  | fitness:  -3322.2074435886693  | cromosomas:  38  | variables:  11.789473684210526  | segundos:  638.8249981403351
iteracion:  70  | fitness:  -3324.854919646909  | cromosomas:  38  | variables:  11.973684210526315  | segundos:  639.9729981422424
iteracion:  71  | fitness:  -3329.6388596609713  | cromosomas:  38  | variables:  11.947368421052632  | segundos:  641.2569980621338
iteracion:  72  | fitness:  -3326.7898784395898  | cromosomas:  38  | va

In [13]:
np.random.seed(1985)

df_prueba_1 = ga_optimizacion(df = df
                              , var_objetivo = 'csMPa'
                              , df_parametros = df_variables
                              , tamano_campeones = 18
                              , factor_mutacion = 0.1
                              , tipo_dato = 'binario'
                              , factor_seleccion = 0.90
                              , tipo_seleccion = 'torneo'
                              , tamano_torneo = 5
                              , tamano_elitismo = 1
                              , tipo_cruce = 'punto unico'
                             )

iteracion:  1  | fitness:  -3003.5409114075164  | cromosomas:  450  | variables:  3.7666666666666666  | segundos:  29.602012157440186
iteracion:  2  | fitness:  -3043.4675096136575  | cromosomas:  406  | variables:  4.078817733990148  | segundos:  54.63301229476929
iteracion:  3  | fitness:  -3065.626466233526  | cromosomas:  368  | variables:  4.372282608695652  | segundos:  75.98901200294495
iteracion:  4  | fitness:  -3084.5966416746  | cromosomas:  332  | variables:  4.641566265060241  | segundos:  94.32899904251099
iteracion:  5  | fitness:  -3101.277998637764  | cromosomas:  300  | variables:  4.923333333333333  | segundos:  110.0280122756958
iteracion:  6  | fitness:  -3117.079567890924  | cromosomas:  270  | variables:  5.2407407407407405  | segundos:  123.55201244354248
iteracion:  7  | fitness:  -3133.059514282645  | cromosomas:  244  | variables:  5.532786885245901  | segundos:  135.09600496292114
iteracion:  8  | fitness:  -3148.2273997106763  | cromosomas:  220  | variable

In [14]:
np.random.seed(1985)

df_prueba_2 = ga_optimizacion(df = df
                              , var_objetivo = 'csMPa'
                              , df_parametros = df_variables
                              , tamano_campeones = 12
                              , factor_mutacion = 0.1
                              , tipo_dato = 'binario'
                              , factor_seleccion = 0.85
                              , tipo_seleccion = 'torneo'
                              , tamano_torneo = 5
                              , tamano_elitismo = 1
                              , tipo_cruce = 'punto unico'
                             )

iteracion:  1  | fitness:  -3019.605690853147  | cromosomas:  426  | variables:  3.8427230046948355  | segundos:  28.680002450942993
iteracion:  2  | fitness:  -3064.68826128768  | cromosomas:  364  | variables:  4.208791208791209  | segundos:  51.463000774383545
iteracion:  3  | fitness:  -3092.1986749238795  | cromosomas:  310  | variables:  4.561290322580645  | segundos:  69.16200065612793
iteracion:  4  | fitness:  -3116.5250632168513  | cromosomas:  264  | variables:  4.958333333333333  | segundos:  82.98300266265869
iteracion:  5  | fitness:  -3135.9871915266976  | cromosomas:  226  | variables:  5.314159292035399  | segundos:  93.85700297355652
iteracion:  6  | fitness:  -3149.3181785830598  | cromosomas:  194  | variables:  5.649484536082475  | segundos:  102.63400077819824
iteracion:  7  | fitness:  -3167.67653042134  | cromosomas:  166  | variables:  6.030120481927711  | segundos:  109.99300074577332
iteracion:  8  | fitness:  -3185.6229683821657  | cromosomas:  142  | variab

In [15]:
np.random.seed(1985)

df_prueba_3 = ga_optimizacion(df = df
                              , var_objetivo = 'csMPa'
                              , df_parametros = df_variables
                              , tamano_campeones = 10
                              , factor_mutacion = 0.1
                              , tipo_dato = 'binario'
                              , factor_seleccion = 0.80
                              , tipo_seleccion = 'torneo'
                              , tamano_torneo = 5
                              , tamano_elitismo = 1
                              , tipo_cruce = 'punto unico'
                             )

iteracion:  1  | fitness:  -3038.454840067612  | cromosomas:  400  | variables:  3.905  | segundos:  27.392013549804688
iteracion:  2  | fitness:  -3082.320549991032  | cromosomas:  320  | variables:  4.4375  | segundos:  46.883002281188965
iteracion:  3  | fitness:  -3113.803229521046  | cromosomas:  256  | variables:  4.875  | segundos:  60.73201322555542
iteracion:  4  | fitness:  -3135.787871894524  | cromosomas:  206  | variables:  5.296116504854369  | segundos:  70.79900026321411
iteracion:  5  | fitness:  -3153.7444059863565  | cromosomas:  166  | variables:  5.825301204819277  | segundos:  78.46801328659058
iteracion:  6  | fitness:  -3172.3184326462288  | cromosomas:  134  | variables:  6.402985074626866  | segundos:  84.16200017929077
iteracion:  7  | fitness:  -3195.11288994391  | cromosomas:  108  | variables:  6.981481481481482  | segundos:  88.53600025177002
iteracion:  8  | fitness:  -3218.1833609992173  | cromosomas:  88  | variables:  7.465909090909091  | segundos:  91

In [28]:
print(np.mean(df_prueba_0[0].sort_values('fitness')[0:10]['fitness'])
      ,np.mean(df_prueba_1[0].sort_values('fitness')[0:10]['fitness'])
      ,np.mean(df_prueba_2[0].sort_values('fitness')[0:10]['fitness'])
      ,np.mean(df_prueba_3[0].sort_values('fitness')[0:10]['fitness']))

-3393.6166361251626 -3400.703712931748 -3385.1356522740416 -3391.3815279596015


In [30]:
np.random.seed(1985)

df_prueba_1_0 = ga_optimizacion(df = df
                                , var_objetivo = 'csMPa'
                                , df_parametros = df_variables
                                , tamano_campeones = 18
                                , factor_mutacion = 0.1
                                , tipo_dato = 'binario'
                                , factor_seleccion = 0.90
                                , tipo_seleccion = 'torneo'
                                , tamano_torneo = 10
                                , tamano_elitismo = 1
                                , tipo_cruce = 'punto unico'
                               )

iteracion:  1  | fitness:  -3001.1126498078656  | cromosomas:  452  | variables:  3.747787610619469  | segundos:  30.019010543823242
iteracion:  2  | fitness:  -3044.8209263385993  | cromosomas:  408  | variables:  4.080882352941177  | segundos:  55.76899743080139
iteracion:  3  | fitness:  -3072.601034871095  | cromosomas:  368  | variables:  4.334239130434782  | segundos:  77.50299763679504
iteracion:  4  | fitness:  -3093.2371237708235  | cromosomas:  332  | variables:  4.614457831325301  | segundos:  96.22300004959106
iteracion:  5  | fitness:  -3103.9532785086126  | cromosomas:  300  | variables:  4.923333333333333  | segundos:  112.07901048660278
iteracion:  6  | fitness:  -3119.866709559915  | cromosomas:  270  | variables:  5.207407407407407  | segundos:  125.65599775314331
iteracion:  7  | fitness:  -3134.618002815417  | cromosomas:  244  | variables:  5.524590163934426  | segundos:  137.24599862098694
iteracion:  8  | fitness:  -3144.903583331269  | cromosomas:  220  | variab

In [31]:
np.random.seed(1985)

df_prueba_1_1 = ga_optimizacion(df = df
                                , var_objetivo = 'csMPa'
                                , df_parametros = df_variables
                                , tamano_campeones = 18
                                , factor_mutacion = 0.1
                                , tipo_dato = 'binario'
                                , factor_seleccion = 0.90
                                , tipo_seleccion = 'torneo'
                                , tamano_torneo = 15
                                , tamano_elitismo = 1
                                , tipo_cruce = 'punto unico'
                               )

iteracion:  1  | fitness:  -3000.8524206934967  | cromosomas:  450  | variables:  3.7755555555555556  | segundos:  30.153011560440063
iteracion:  2  | fitness:  -3034.877547008132  | cromosomas:  406  | variables:  4.041871921182266  | segundos:  55.53200030326843
iteracion:  3  | fitness:  -3070.991430503638  | cromosomas:  366  | variables:  4.325136612021858  | segundos:  77.04399967193604
iteracion:  4  | fitness:  -3089.0749252817045  | cromosomas:  330  | variables:  4.615151515151515  | segundos:  95.44201111793518
iteracion:  5  | fitness:  -3106.1662777482097  | cromosomas:  298  | variables:  4.946308724832215  | segundos:  111.09001111984253
iteracion:  6  | fitness:  -3118.4972770759978  | cromosomas:  270  | variables:  5.218518518518518  | segundos:  124.63700079917908
iteracion:  7  | fitness:  -3129.6184943217377  | cromosomas:  244  | variables:  5.508196721311475  | segundos:  136.2770116329193
iteracion:  8  | fitness:  -3145.021422785041  | cromosomas:  220  | varia

In [32]:
print(np.mean(df_prueba_1[0].sort_values('fitness')[0:10]['fitness'])
      ,np.mean(df_prueba_1_0[0].sort_values('fitness')[0:10]['fitness'])
      ,np.mean(df_prueba_1_1[0].sort_values('fitness')[0:10]['fitness']))

-3400.703712931748 -3399.748209921735 -3394.9454352460575


In [33]:
np.random.seed(1985)

df_prueba_1_0 = ga_optimizacion(df = df
                              , var_objetivo = 'csMPa'
                              , df_parametros = df_variables
                              , tamano_campeones = 18
                              , factor_mutacion = 0.1
                              , tipo_dato = 'binario'
                              , factor_seleccion = 0.90
                              , tipo_seleccion = 'ruleta'
                              , tamano_torneo = 5
                              , tamano_elitismo = 1
                              , tipo_cruce = 'punto unico'
                             )

iteracion:  1  | fitness:  -2957.063151300193  | cromosomas:  450  | variables:  3.5733333333333333  | segundos:  39.64601278305054
iteracion:  2  | fitness:  -2976.915342281983  | cromosomas:  406  | variables:  3.6748768472906406  | segundos:  73.71501278877258
iteracion:  3  | fitness:  -2984.0288751129597  | cromosomas:  366  | variables:  3.8224043715846996  | segundos:  100.71401262283325
iteracion:  4  | fitness:  -2993.900731204803  | cromosomas:  330  | variables:  3.9757575757575756  | segundos:  122.35301280021667
iteracion:  5  | fitness:  -2998.062597428885  | cromosomas:  298  | variables:  4.114093959731544  | segundos:  141.27701258659363
iteracion:  6  | fitness:  -3017.6693147477254  | cromosomas:  270  | variables:  4.2407407407407405  | segundos:  157.5600128173828
iteracion:  7  | fitness:  -3023.0998408732835  | cromosomas:  244  | variables:  4.385245901639344  | segundos:  171.00601267814636
iteracion:  8  | fitness:  -3018.6985351999606  | cromosomas:  220  | v

In [34]:
print(np.mean(df_prueba_1[0].sort_values('fitness')[0:10]['fitness'])
      ,np.mean(df_prueba_1_0[0].sort_values('fitness')[0:10]['fitness']))

-3400.703712931748 -3393.7261519541803


In [35]:
np.random.seed(1985)

df_prueba_1_0 = ga_optimizacion(df = df
                              , var_objetivo = 'csMPa'
                              , df_parametros = df_variables
                              , tamano_campeones = 28
                              , factor_mutacion = 0.1
                              , tipo_dato = 'binario'
                              , factor_seleccion = 0.90
                              , tipo_seleccion = 'torneo'
                              , tamano_torneo = 5
                              , tamano_elitismo = 2
                              , tipo_cruce = 'punto unico'
                             )

iteracion:  1  | fitness:  -2996.5741863503267  | cromosomas:  452  | variables:  3.7809734513274336  | segundos:  29.638999700546265
iteracion:  2  | fitness:  -3035.56333988424  | cromosomas:  408  | variables:  4.1004901960784315  | segundos:  55.97400140762329
iteracion:  3  | fitness:  -3068.8014249380194  | cromosomas:  370  | variables:  4.386486486486486  | segundos:  78.3570008277893
iteracion:  4  | fitness:  -3088.692123688699  | cromosomas:  334  | variables:  4.700598802395209  | segundos:  97.22999835014343
iteracion:  5  | fitness:  -3107.566974040019  | cromosomas:  302  | variables:  5.072847682119205  | segundos:  113.7579996585846
iteracion:  6  | fitness:  -3124.526934516854  | cromosomas:  274  | variables:  5.416058394160584  | segundos:  127.548011302948
iteracion:  7  | fitness:  -3141.3989417684998  | cromosomas:  248  | variables:  5.782258064516129  | segundos:  139.4059989452362
iteracion:  8  | fitness:  -3157.373878867906  | cromosomas:  226  | variables: 

In [36]:
np.random.seed(1985)

df_prueba_1_1 = ga_optimizacion(df = df
                              , var_objetivo = 'csMPa'
                              , df_parametros = df_variables
                              , tamano_campeones = 58
                              , factor_mutacion = 0.1
                              , tipo_dato = 'binario'
                              , factor_seleccion = 0.90
                              , tipo_seleccion = 'torneo'
                              , tamano_torneo = 5
                              , tamano_elitismo = 5
                              , tipo_cruce = 'punto unico'
                             )

iteracion:  1  | fitness:  -3006.0528979794676  | cromosomas:  454  | variables:  3.801762114537445  | segundos:  29.82201337814331
iteracion:  2  | fitness:  -3039.345068745268  | cromosomas:  414  | variables:  4.159420289855072  | segundos:  55.83601355552673
iteracion:  3  | fitness:  -3075.9467419424427  | cromosomas:  378  | variables:  4.478835978835979  | segundos:  77.91801619529724
iteracion:  4  | fitness:  -3096.1774613822727  | cromosomas:  346  | variables:  4.835260115606936  | segundos:  97.11701345443726
iteracion:  5  | fitness:  -3117.9464043715957  | cromosomas:  316  | variables:  5.227848101265823  | segundos:  113.92201352119446
iteracion:  6  | fitness:  -3130.5989880556153  | cromosomas:  290  | variables:  5.617241379310345  | segundos:  128.7370002269745
iteracion:  7  | fitness:  -3152.2937856948647  | cromosomas:  266  | variables:  5.988721804511278  | segundos:  141.70189571380615
iteracion:  8  | fitness:  -3170.991681188774  | cromosomas:  244  | variab

In [37]:
np.random.seed(1985)

df_prueba_1_2 = ga_optimizacion(df = df
                              , var_objetivo = 'csMPa'
                              , df_parametros = df_variables
                              , tamano_campeones = 108
                              , factor_mutacion = 0.1
                              , tipo_dato = 'binario'
                              , factor_seleccion = 0.90
                              , tipo_seleccion = 'torneo'
                              , tamano_torneo = 5
                              , tamano_elitismo = 10
                              , tipo_cruce = 'punto unico'
                             )

iteracion:  1  | fitness:  -3010.0349362191423  | cromosomas:  460  | variables:  3.8086956521739133  | segundos:  29.805001735687256
iteracion:  2  | fitness:  -3055.950019041263  | cromosomas:  424  | variables:  4.1674528301886795  | segundos:  57.786999464035034
iteracion:  3  | fitness:  -3090.5102832843086  | cromosomas:  392  | variables:  4.538265306122449  | segundos:  81.15701222419739
iteracion:  4  | fitness:  -3114.852138747859  | cromosomas:  362  | variables:  4.933701657458563  | segundos:  101.583012342453
iteracion:  5  | fitness:  -3137.2554948589345  | cromosomas:  336  | variables:  5.303571428571429  | segundos:  119.62501239776611
iteracion:  6  | fitness:  -3164.270186315573  | cromosomas:  312  | variables:  5.737179487179487  | segundos:  136.4520013332367
iteracion:  7  | fitness:  -3187.1632618376702  | cromosomas:  290  | variables:  6.258620689655173  | segundos:  151.18700003623962
iteracion:  8  | fitness:  -3208.8223534733943  | cromosomas:  270  | vari

In [38]:
print(np.mean(df_prueba_1[0].sort_values('fitness')[0:10]['fitness'])
      ,np.mean(df_prueba_1_0[0].sort_values('fitness')[0:10]['fitness'])
      ,np.mean(df_prueba_1_1[0].sort_values('fitness')[0:10]['fitness'])
      ,np.mean(df_prueba_1_2[0].sort_values('fitness')[0:10]['fitness']))

-3400.703712931748 -3400.8359273076 -3402.8094361691883 -3405.2308085490868


In [39]:
np.random.seed(1985)

df_prueba_1_2_0 = ga_optimizacion(df = df
                              , var_objetivo = 'csMPa'
                              , df_parametros = df_variables
                              , tamano_campeones = 108
                              , factor_mutacion = 0.1
                              , tipo_dato = 'binario'
                              , factor_seleccion = 0.90
                              , tipo_seleccion = 'torneo'
                              , tamano_torneo = 5
                              , tamano_elitismo = 10
                              , tipo_cruce = 'dos puntos'
                             )

iteracion:  1  | fitness:  -3014.6573807772807  | cromosomas:  460  | variables:  3.7978260869565217  | segundos:  29.74999713897705
iteracion:  2  | fitness:  -3054.292384661168  | cromosomas:  424  | variables:  4.158018867924528  | segundos:  55.781010150909424
iteracion:  3  | fitness:  -3089.3206326147156  | cromosomas:  392  | variables:  4.497448979591836  | segundos:  78.91999769210815
iteracion:  4  | fitness:  -3114.1571642300423  | cromosomas:  362  | variables:  4.906077348066298  | segundos:  99.1880099773407
iteracion:  5  | fitness:  -3131.586159320511  | cromosomas:  336  | variables:  5.357142857142857  | segundos:  117.13801002502441
iteracion:  6  | fitness:  -3150.2841753626117  | cromosomas:  312  | variables:  5.8173076923076925  | segundos:  133.18399691581726
iteracion:  7  | fitness:  -3173.2444419618073  | cromosomas:  290  | variables:  6.272413793103448  | segundos:  147.50699949264526
iteracion:  8  | fitness:  -3191.0532400375187  | cromosomas:  270  | var

In [40]:
np.random.seed(1985)

df_prueba_1_2_1 = ga_optimizacion(df = df
                              , var_objetivo = 'csMPa'
                              , df_parametros = df_variables
                              , tamano_campeones = 108
                              , factor_mutacion = 0.1
                              , tipo_dato = 'binario'
                              , factor_seleccion = 0.90
                              , tipo_seleccion = 'torneo'
                              , tamano_torneo = 5
                              , tamano_elitismo = 10
                              , tipo_cruce = 'uniforme'
                             )

iteracion:  1  | fitness:  -3005.860251966027  | cromosomas:  460  | variables:  3.7934782608695654  | segundos:  30.04799747467041
iteracion:  2  | fitness:  -3053.52105502515  | cromosomas:  424  | variables:  4.169811320754717  | segundos:  56.175009965896606
iteracion:  3  | fitness:  -3079.5986110872213  | cromosomas:  392  | variables:  4.5229591836734695  | segundos:  79.18999695777893
iteracion:  4  | fitness:  -3106.4839734998222  | cromosomas:  362  | variables:  4.933701657458563  | segundos:  99.596999168396
iteracion:  5  | fitness:  -3127.2506076235213  | cromosomas:  336  | variables:  5.366071428571429  | segundos:  117.67999792098999
iteracion:  6  | fitness:  -3153.129328248319  | cromosomas:  312  | variables:  5.833333333333333  | segundos:  133.78499674797058
iteracion:  7  | fitness:  -3172.8876638285865  | cromosomas:  290  | variables:  6.3068965517241375  | segundos:  149.22799706459045
iteracion:  8  | fitness:  -3195.4170374565156  | cromosomas:  270  | varia

In [41]:
print(np.mean(df_prueba_1_2[0].sort_values('fitness')[0:10]['fitness'])
      ,np.mean(df_prueba_1_2_0[0].sort_values('fitness')[0:10]['fitness'])
      ,np.mean(df_prueba_1_2_1[0].sort_values('fitness')[0:10]['fitness']))

-3405.2308085490868 -3406.588711805741 -3404.5397311746005


In [42]:
np.random.seed(1985)

df_prueba_1_2_0 = ga_optimizacion(df = df
                              , var_objetivo = 'csMPa'
                              , df_parametros = df_variables
                              , tamano_campeones = 108
                              , factor_mutacion = 0.2
                              , tipo_dato = 'binario'
                              , factor_seleccion = 0.90
                              , tipo_seleccion = 'torneo'
                              , tamano_torneo = 5
                              , tamano_elitismo = 10
                              , tipo_cruce = 'uniforme'
                             )

iteracion:  1  | fitness:  -3005.860251966027  | cromosomas:  460  | variables:  3.9804347826086954  | segundos:  31.54874086380005
iteracion:  2  | fitness:  -3050.9321761871784  | cromosomas:  424  | variables:  4.4504716981132075  | segundos:  58.393205881118774
iteracion:  3  | fitness:  -3088.5779833331467  | cromosomas:  392  | variables:  4.857142857142857  | segundos:  81.91081023216248
iteracion:  4  | fitness:  -3119.912870346772  | cromosomas:  362  | variables:  5.314917127071824  | segundos:  102.74600648880005
iteracion:  5  | fitness:  -3136.685104741903  | cromosomas:  336  | variables:  5.800595238095238  | segundos:  121.33086729049683
iteracion:  6  | fitness:  -3165.984427081104  | cromosomas:  312  | variables:  6.288461538461538  | segundos:  137.91412806510925
iteracion:  7  | fitness:  -3189.1626379462045  | cromosomas:  290  | variables:  6.810344827586207  | segundos:  152.6009247303009
iteracion:  8  | fitness:  -3208.2761686652766  | cromosomas:  270  | vari

In [43]:
np.random.seed(1985)

df_prueba_1_2_1 = ga_optimizacion(df = df
                              , var_objetivo = 'csMPa'
                              , df_parametros = df_variables
                              , tamano_campeones = 108
                              , factor_mutacion = 0.3
                              , tipo_dato = 'binario'
                              , factor_seleccion = 0.90
                              , tipo_seleccion = 'torneo'
                              , tamano_torneo = 5
                              , tamano_elitismo = 10
                              , tipo_cruce = 'uniforme'
                             )

iteracion:  1  | fitness:  -3005.8602519660276  | cromosomas:  460  | variables:  4.158695652173913  | segundos:  30.764503717422485
iteracion:  2  | fitness:  -3058.2790475872284  | cromosomas:  424  | variables:  4.712264150943396  | segundos:  57.98667597770691
iteracion:  3  | fitness:  -3100.189871578004  | cromosomas:  392  | variables:  5.239795918367347  | segundos:  81.85425591468811
iteracion:  4  | fitness:  -3130.1302021102642  | cromosomas:  362  | variables:  5.792817679558011  | segundos:  103.0028760433197
iteracion:  5  | fitness:  -3153.912583802548  | cromosomas:  336  | variables:  6.279761904761905  | segundos:  121.75412583351135
iteracion:  6  | fitness:  -3172.523098549506  | cromosomas:  312  | variables:  6.766025641025641  | segundos:  138.53670692443848
iteracion:  7  | fitness:  -3196.9516609978696  | cromosomas:  290  | variables:  7.317241379310345  | segundos:  153.52538633346558
iteracion:  8  | fitness:  -3217.0504243193054  | cromosomas:  270  | varia

In [44]:
print(np.mean(df_prueba_1_2[0].sort_values('fitness')[0:10]['fitness'])
      ,np.mean(df_prueba_1_2_0[0].sort_values('fitness')[0:10]['fitness'])
      ,np.mean(df_prueba_1_2_1[0].sort_values('fitness')[0:10]['fitness']))

-3405.2308085490868 -3404.7928411087896 -3406.123335807096


In [58]:
mejores_var = df_prueba_1_2[0].sort_values('fitness').drop(columns = ['grupo_cruce'
                                                                      , 'seleccionado'
                                                                      , 'fila']).reset_index(drop = True)[0:100].describe().iloc[1,:]
mejores_var = list(mejores_var[mejores_var>=0.99].index)

mejores_var

['flyash',
 'water',
 'coarseaggregate',
 'age',
 'cement-fineaggregate',
 'slag-water',
 'slag-fineaggregate',
 'flyash-water',
 'flyash-fineaggregate',
 'water-cement',
 'water-coarseaggregate',
 'superplasticizer-cement',
 'coarseaggregate-cement']

<h2>1.3 Estimación de parámetros de por GA con minimización de RMSE</h2>

<h3>1.3.1 Generación de dataset con posibles parámetros</h3>

In [186]:
np.random.seed(2021)

variables_modelo = mejores_var+['csMPa']

df = df[variables_modelo]

df_pendientes = pd.DataFrame([np.random.choice(5
                                               , len(mejores_var)
                                               , p=[0.2, 0.2, 0.2, 0.2, 0.2]) for x in range(500)]
                            ,columns=mejores_var)

df_pendientes = df_pendientes / 4

In [188]:
np.random.seed(2022)

df_prueba_param_1_0 = ga_optimizacion2(df = df
                              , var_objetivo = 'csMPa'
                              , df_parametros = df_pendientes
                              , tamano_campeones = 38
                              , factor_mutacion = 0.1
                              , tipo_dato = 'flotante'
                              , factor_seleccion = 0.95
                              , tipo_seleccion = 'torneo'
                              , tamano_torneo = 5
                              , tamano_elitismo = 1
                              , tipo_cruce = 'punto unico'
                             )

iteracion:  1  | fitness:  14.788073385750614  | cromosomas:  476  | variables:  6.559605343455681  | segundos:  33.06233859062195
iteracion:  2  | fitness:  14.129309129613006  | cromosomas:  454  | variables:  6.477814709965744  | segundos:  64.0888831615448
iteracion:  3  | fitness:  13.394835109058862  | cromosomas:  432  | variables:  6.3888418806059875  | segundos:  92.55240988731384
iteracion:  4  | fitness:  12.677445354418344  | cromosomas:  412  | variables:  6.271928674008373  | segundos:  118.96224284172058
iteracion:  5  | fitness:  11.973420677682126  | cromosomas:  392  | variables:  6.169975782473418  | segundos:  143.53064393997192
iteracion:  6  | fitness:  11.402113852924364  | cromosomas:  374  | variables:  6.080964365278341  | segundos:  166.19498372077942
iteracion:  7  | fitness:  10.793694078708418  | cromosomas:  356  | variables:  5.972569197926138  | segundos:  187.3827202320099
iteracion:  8  | fitness:  10.230674594459465  | cromosomas:  340  | variables: 

iteracion:  64  | fitness:  0.11322081301954051  | cromosomas:  38  | variables:  2.060441085213556  | segundos:  505.90082120895386
iteracion:  65  | fitness:  0.11114912879773121  | cromosomas:  38  | variables:  2.0179719625141352  | segundos:  507.1081552505493
iteracion:  66  | fitness:  0.09017282160153386  | cromosomas:  38  | variables:  1.990027824155339  | segundos:  508.3152425289154
iteracion:  67  | fitness:  0.07438328701179202  | cromosomas:  38  | variables:  1.9380728972914099  | segundos:  509.5182423591614
iteracion:  68  | fitness:  0.08452845967661987  | cromosomas:  38  | variables:  1.9382158291746618  | segundos:  510.72825503349304
iteracion:  69  | fitness:  0.08056071963285098  | cromosomas:  38  | variables:  1.9789684030372827  | segundos:  511.93525528907776
iteracion:  70  | fitness:  0.07163108178813782  | cromosomas:  38  | variables:  1.9078460758134854  | segundos:  513.1276016235352
iteracion:  71  | fitness:  0.058264053690708625  | cromosomas:  38 

In [None]:
np.random.seed(2022)

df_prueba_param_1_0 = ga_optimizacion2(df = df
                              , var_objetivo = 'csMPa'
                              , df_parametros = df_pendientes
                              , tamano_campeones = 38
                              , factor_mutacion = 0.1
                              , tipo_dato = 'flotante'
                              , factor_seleccion = 0.90
                              , tipo_seleccion = 'torneo'
                              , tamano_torneo = 5
                              , tamano_elitismo = 1
                              , tipo_cruce = 'punto unico'
                             )

iteracion:  1  | fitness:  14.026773317104036  | cromosomas:  450  | variables:  6.469260185593094  | segundos:  32.33288073539734
iteracion:  2  | fitness:  12.80472516371137  | cromosomas:  406  | variables:  6.325056210156074  | segundos:  60.0319664478302
iteracion:  3  | fitness:  11.680667924060762  | cromosomas:  366  | variables:  6.171102242241976  | segundos:  83.41937923431396
