In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time, os
import random
import seaborn as sns

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, LeakyReLU
from tensorboard.plugins.hparams import api as hp

from keras import activations
from keras.utils import to_categorical
from keras.losses import CategoricalCrossentropy
from keras.models import Model

from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split as split

from deap import base, creator, tools, algorithms
from scipy.stats import bernoulli
from bitstring import BitArray

from elitism import eaSimpleWithElitism, main

Using TensorFlow backend.


Succesfully imported


In [2]:
np.random.seed(0)

tf.config.optimizer.set_jit(True)
scaler = StandardScaler()

### Conjunto de datos: SDSS DR17

In [3]:
data = pd.read_csv('./SDSS/star_classification.csv')
cols = ['alpha','delta','u','g','r','i','z','redshift','class']
data = data[cols]
data.head()

Unnamed: 0,alpha,delta,u,g,r,i,z,redshift,class
0,135.689107,32.494632,23.87882,22.2753,20.39501,19.16573,18.79371,0.634794,GALAXY
1,144.826101,31.274185,24.77759,22.83188,22.58444,21.16812,21.61427,0.779136,GALAXY
2,142.18879,35.582444,25.26307,22.66389,20.60976,19.34857,18.94827,0.644195,GALAXY
3,338.741038,-0.402828,22.13682,23.77656,21.61162,20.50454,19.2501,0.932346,GALAXY
4,345.282593,21.183866,19.43718,17.58028,16.49747,15.97711,15.54461,0.116123,GALAXY


Se cambian las clases a vectores con números enteros

$$ \text{GALAXY}: \begin{pmatrix}1\\0\\0\end{pmatrix},\quad \text{STAR}: \begin{pmatrix}0\\1\\0\end{pmatrix},\quad \text{QSO}:\begin{pmatrix}0\\0\\1\end{pmatrix} $$

In [4]:
data["class"]=[0 if i == "GALAXY" else 1 if i == "STAR" else 2 for i in data["class"]]
print(data.head())
data = data.to_numpy()

        alpha      delta         u         g         r         i         z  \
0  135.689107  32.494632  23.87882  22.27530  20.39501  19.16573  18.79371   
1  144.826101  31.274185  24.77759  22.83188  22.58444  21.16812  21.61427   
2  142.188790  35.582444  25.26307  22.66389  20.60976  19.34857  18.94827   
3  338.741038  -0.402828  22.13682  23.77656  21.61162  20.50454  19.25010   
4  345.282593  21.183866  19.43718  17.58028  16.49747  15.97711  15.54461   

   redshift  class  
0  0.634794      0  
1  0.779136      0  
2  0.644195      0  
3  0.932346      0  
4  0.116123      0  


### Implementación

Se establecen las funciones necesarias para aplicar el algorítmo genético a partir de lo siguiente

- 1) Decodificar el gen del individuo para obtener el núm. de capas ocultas, núm. de neuronas y la tasa de aprendizaje.
    
    
- 2) Preparar el conjunto de datos para dividirlos en conjunto de entrenamiento y validación.
    
    
- 3) Entrenar la red neuronal, calcular la presición del modelo en el conjunto de validación y regresarlo como fitness score para el algorítmo genético.

In [5]:
# Activation function layers

f1 = lambda x: Dense(x, activation='relu')      #ReLU
f2 = lambda x: keras.layers.LeakyReLU(0.3)      #LReLU
f3 = lambda x: Dense(x, activation='elu')       #ELU
f4 = lambda x: Dense(x, kernel_initializer='lecun_normal', activation='selu')   #SELU

f_names = ["ReLU", "LReLU", "ELU", "SELU"]

In [6]:
SC_DEEP       = np.array([2,4,8,16,24,32,48,64])        # Number of deep layers (8)
SC_NUM_UNITS  = np.array([8,16,24,32,40,48,56,64])   # Number of fully conected neurons (8)
SC_LEARNING   = np.array([1e-4,1e-3,1e-2,1e-1])         # Learning rates (4)
SC_ACTIVATION = [f1, f2, f3, f4]                        # Activation function layers (4)
# SC_BATCHSIZE = np.array([16,32])

# callbacks = [keras.callbacks.EarlyStopping(monitor='val_categorical_accuracy', mode='max',
#                                min_delta=0,
#                                patience=6,
#                                restore_best_weights=True)]

callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', mode='auto',
                               min_delta=0.01, 
                               patience=10,
                               verbose=1,
                               baseline=0.98, restore_best_weights=False)]
    
batch_size = 128;      epochs = 50

In [7]:
# Divide data into X and Y and implement hot_ones in Y
def prepare_dataset(data):
    X, Y = np.empty((0)), np.empty((0))
    X = data[:,0:8]
    Y = data[:,8]
    Y = to_categorical(Y, num_classes=3)
    return X, Y

In [8]:
# Split dataset into train and validation (70/30)
X,Y = prepare_dataset(data)
X_train, X_test, Y_train, Y_test = split(X, Y, test_size = 0.3, random_state = 0)
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

### Architectura del modelo

In [37]:
sss= [2,2,2,2]
sss.pop()
sss

[2, 2, 2]

In [38]:

def train_evaluate(ga_individual_solution):   
    t = time.time(); t_total = 0
    
    # Decode GA solution to integer for window_size and num_units
    deep_layers_bits   = BitArray(ga_individual_solution[0:3])   # (8)
    num_units_bits     = BitArray(ga_individual_solution[3:6])   # (8)
    learning_rate_bits = BitArray(ga_individual_solution[6:8])   # (4)
    activation_f_bits  = BitArray(ga_individual_solution[8:10])  # (4)
    
    deep_layers   = SC_DEEP[deep_layers_bits.uint]
    num_units     = SC_NUM_UNITS[num_units_bits.uint]
    learning_rate = SC_LEARNING[learning_rate_bits.uint]
    activation_f  = SC_ACTIVATION[activation_f_bits.uint]
    
    print('\n--------------- Starting trial:', len(ss), "---------------")
    print('Deep Layers: ',deep_layers,', Number of neurons: ',num_units,", Learning rate: ",learning_rate,', Activation function: ',f_names[activation_f_bits.uint])
#     print("-------------------------------------------------")
    
    # Train model and predict on validation set
    model = keras.Sequential()
    model.add(Input(shape=(int(X_train.shape[1]),)))
    model.add(Dense(num_units, input_shape=(int(X_train.shape[1]),)))
    
    for i in range(deep_layers):        
        model.add(activation_f(num_units))
        if i % SC_NUM_UNITS[1]==0:
            model.add(keras.layers.Dropout(0.3))
    model.add(Dense(3, activation=tf.nn.softmax))
    
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-3)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])
    model.fit(X_train, Y_train, epochs=epochs, validation_data=(X_test, Y_test),
              callbacks=callbacks, batch_size=batch_size, shuffle=True, verbose=0)
    
    _, score = model.evaluate(X_test, Y_test)    
    t = time.time()-t; ss = ss[1:]
    print("Accuracy:", score, ", Elapsed time:", t)
    print("-------------------------------------------------\n")

#     datos.append([deep_layers, num_units, learning_rate, f_names[activation_f_bits.uint], score, t])
    
    return score,

A continuación, use la paquetería DEAP para definir las cosas para ejecutar GA. Usaremos una representación binaria para la solución de longitud diez. Se inicializará aleatoriamente utilizando la distribución de Bernoulli. Del mismo modo, se utiliza el crossover ordenado, la mutación aleatoria y la selección de la rueda de la ruleta. Los valores del parámetro GA se inicializan arbitrariamente.

In [39]:
population_size = 40; 
max_generations = 20
gene_length = 10;
k = 5

# Genetic Algorithm constants:
P_CROSSOVER = 0.85  # probability for crossover
P_MUTATION = 0.5   # (try also 0.5) probability for mutating an individual
HALL_OF_FAME_SIZE = 1
CROWDING_FACTOR = 25.0  # crowding factor for crossover and mutation

# datos = []
ss = [i for i in range(1,population_size*max_generations + 1)]

# set the random seed:
toolbox = base.Toolbox()

# As we are trying to minimize the RMSE score, that's why using -1.0. 
# In case, when you want to maximize accuracy for instance, use 1.0
creator.create('FitnessMax', base.Fitness, weights = [1.0])
creator.create('Individual', list , fitness = creator.FitnessMax)

# create the individual operator to fill up an Individual instance:
toolbox.register('binary', bernoulli.rvs, 0.5)
toolbox.register('individual', tools.initRepeat, creator.Individual, toolbox.binary, n = gene_length)

# create the population operator to generate a list of individuals:
toolbox.register('population', tools.initRepeat, list , toolbox.individual)

# genetic operators:
toolbox.register('select', tools.selRoulette)
toolbox.register('mate', tools.cxOrdered)
toolbox.register('mutate', tools.mutShuffleIndexes, indpb = 0.6)
toolbox.register('evaluate', train_evaluate)

# Genetic Algorithm flow:
def main():

    # create initial population (generation 0):
    population = toolbox.population(n=population_size)

    # prepare the statistics object:
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("max", np.max)
    stats.register("avg", np.mean)

    # define the hall-of-fame object:
    hof = tools.HallOfFame(HALL_OF_FAME_SIZE)

    # perform the Genetic Algorithm flow with elitism:
    population, logbook = eaSimpleWithElitism(population, toolbox, cxpb=P_CROSSOVER, mutpb=P_MUTATION,
                                              ngen=max_generations, stats=stats, halloffame=hof, verbose=True)

    # print info for best solution found:
    best = hof.items[0]
    print("-- Best Individual = ", best)
    print("-- Best Fitness = ", best.fitness.values[0])

    # extract statistics:
    maxFitnessValues, meanFitnessValues = logbook.select("max", "avg")

    # plot statistics:
    sns.set_style("whitegrid")
    plt.plot(maxFitnessValues, color='red', label="Max")
    plt.plot(meanFitnessValues, color='green', label="Mean")
    plt.xlabel('Generation'); plt.ylabel('max / Average Fitness')
    plt.legend()
    plt.title('Max and Average fitness over Generations')
#     plt.yscale("log")
    plt.show()
    
    best_population = tools.selBest(population,k = k)
    return best_population

if __name__ == "__main__":
    t = time.time()
    best_population = main()
    print("Total elapsed time:", (time.time()-t)/60, "minutes")

UnboundLocalError: local variable 'ss' referenced before assignment

### Guardar datos

In [None]:
Best_deep_layers   = []
Best_num_units     = []
Best_learning_rate = []
Best_activation_f  = []

t = 0

for bi in best_individuals:
    deep_layers_bits   = BitArray(bi[0:3])
    num_units_bits     = BitArray(bi[3:6])
    learning_rate_bits = BitArray(bi[6:8])
    activation_f_bits  = BitArray(bi[8:10])
    t += 1 
    
    Best_deep_layers.append(  SC_DEEP[deep_layers_bits.uint])
    Best_num_units.append(    SC_NUM_UNITS[num_units_bits.uint])
    Best_learning_rate.append(SC_LEARNING[learning_rate_bits.uint])
    Best_activation_f.append( SC_ACTIVATION[activation_f_bits.uint])
    print('k=',t,'\nDeep Layers: ', Best_deep_layers[-1], ', Num of Units: ', Best_num_units[-1], ', Learning rate: ', Best_learning_rate[-1], ", Activation function: ", Best_activation_f[-1])

In [None]:
filename = "historial_sdss.txt"
df = pd.DataFrame(datos, columns = ["Deep size", "Num units", "Learning rate", "Activation function", "Accuracy", "Elapsed time"])

df.sort_values(by=["Accuracy", "Elapsed time"], ascending=[0,0], ignore_index=True, inplace=True)

df.to_csv(filename, header=True, index=False, sep='\t', mode='w') # a=append, w=overwrite

In [None]:
df

In [None]:
np.sum(df[["Tiempo de ejecución"]])/60/60

### Cargar datos

In [None]:
pathname = os.path.join('.\\', filename)
while True:
    try: 
        df 
        break
    except:
        df = pd.read_csv(pathname, delimiter = "\t")
        break

In [None]:
df

### Mejores individuos

In [None]:
# Selecciona los mejores k individuos - (para k=5)
k = 10
best_genes = df.iloc[:k,]

best_deep_size = best_genes.iloc[:,0]
best_num_units = best_genes.iloc[:,1]
best_learning_rate = best_genes.iloc[:,2]

best_genes

In [None]:
best_deep_size

In [None]:
# Train the model using best configuration on complete training set 
#and make predictions on the test set

X,Y = prepare_dataset(data)
X_train, X_test, Y_train, Y_test = split(X, Y, test_size = 0.20, random_state = 0)

models = []
historial = []
y_pred = []

for k in range(len(best_deep_size)):
    print('\n--- Starting trial:', k)
    print('Deep Size: ', best_deep_size[k], ', Num of Units: ', best_num_units[k], ', Learning rate: ', best_learning_rate[k])
    
    models.append(keras.Sequential())
    models[-1].add(Input(shape=(int(X_train.shape[1]),)))
    models[-1].add(Dense(best_num_units[k], input_shape=(int(X_train.shape[1]),)))
#     x = LSTM(num_units, input_shape=(window_size,1))(inputs)

    for i in range(best_deep_size[k]):        
        models[-1].add(Dense(best_num_units[k], activation='relu'))
    models[-1].add(Dense(3, activation=tf.nn.softmax))

    optimizer = keras.optimizers.Adam(learning_rate=best_learning_rate[k], beta_1=0.9, beta_2=0.999, epsilon=1e-3)
    models[-1].compile(
            optimizer=optimizer,
            loss=CategoricalCrossentropy(),
            metrics=["categorical_accuracy"])
            
    callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', mode='max',
                                       min_delta=0,
                                       patience=50,
                                       restore_best_weights=True)]
#     history = models[-1].fit(X_train, y_train, validation_data=(X_test, y_test), 
#                         epochs=best_epochs, callbacks=callbacks, batch_size=best_batch_size, shuffle=True)
    historial.append(models[-1].fit(X_train, y_train, epochs=20, validation_data=(X_test, Y_test),
                                    batch_size=128, shuffle=True))
    y_pred.append(models[-1].predict(X))
    
    _, score = models[-1].evaluate(X_test, Y_test)  

    print('Test score: ', score)

In [None]:
fig, axs = plt.subplots(1, 5, figsize=(14,6), sharey='row')
fig.suptitle('Best models')
titles = []
for k in range(len(models)):
    titles.append(f"Model loss {k+1} (log)")
    axs[k].plot(historial[k].history['loss'])
    axs[k].plot(historial[k].history['val_loss'])
#     print(titles[-1])
    axs[k].set_title(titles[-1])
    axs[k].set_yscale("log")
    axs[k].legend(['train', 'test'], loc='upper left')
    axs[k].grid()

for ax in axs.flat:
#     ax.set(xlabel='Epoch', ylabel='Loss')
    ax.set(xlabel='Epoch')
axs.flat[0].set(ylabel='Loss')
plt.show()

### Gráficas de comparación

In [None]:
Y_pred1 = np.argmax(models[0].predict(X), axis=1)
Y_pred2 = np.argmax(models[1].predict(X), axis=1)
# Y_pred3 = np.argmax(models[2].predict(X), axis=1)

In [None]:
Y_pred1

In [None]:
# X[:,4].min(),X[:,4].max(), X[:,4].mean(), X[:,4].std()

In [None]:
fig = plt.figure(figsize=(9,8))
ax = fig.add_subplot(111, projection='3d')
p = ax.scatter(X[:,2], X[:,3], X[:,4], marker='+', c=np.argmax(Y,axis=1), cmap='brg', alpha=1)
ax.set_xlabel('$u$', fontsize=15); ax.set_xlim([10, 30])
ax.set_ylabel('$g$', fontsize=15); ax.set_ylim([5, 35])
ax.set_zlabel('$r$', fontsize=15); ax.set_zlim([8, 25])
plt.colorbar(p, shrink=0.5, label='Etiqueta objeto'); plt.title(r"Clasificación de objetos estelares (Datos originales)")
plt.show()

In [None]:
fig = plt.figure(figsize=(9,8))
ax = fig.add_subplot(111, projection='3d')
p = ax.scatter(X[:,2], X[:,3], X[:,4], marker='+', c=Y_pred1, cmap='brg', alpha=1)
ax.set_xlabel('$u$', fontsize=15); ax.set_xlim([10, 30])
ax.set_ylabel('$g$', fontsize=15); ax.set_ylim([5, 35])
ax.set_zlabel('$r$', fontsize=15); ax.set_zlim([8, 25])
plt.colorbar(p, shrink=0.5, label='Etiqueta objeto'); plt.title(r"Clasificación de objetos estelares (Modelo 1)")
plt.show()

In [None]:
fig = plt.figure(figsize=(9,8))
ax = fig.add_subplot(111, projection='3d')
p = ax.scatter(X[:,2], X[:,3], X[:,4], marker='+', c=Y_pred2, cmap='brg', alpha=1)
ax.set_xlabel('$u$', fontsize=15); ax.set_xlim([10, 30])
ax.set_ylabel('$g$', fontsize=15); ax.set_ylim([5, 35])
ax.set_zlabel('$r$', fontsize=15); ax.set_zlim([8, 25])
plt.colorbar(p, shrink=0.5, label='Etiqueta objeto'); plt.title(r"Clasificación de objetos estelares (Modelo 2)")
plt.show()

### Resultado de hiperparámetros

In [None]:
best_genes.mean()

In [None]:
g1= sns.jointplot(data=df, x="Deep size", y="Num units", kind="kde")
g1.ax_joint.scatter(best_genes.iloc[:,0], best_genes.iloc[:,1], color = 'red', label="a")

plt.show()

g2= sns.jointplot(x=df.iloc[:,2], y=df.iloc[:,4], kind='kde')
g2.ax_joint.scatter(best_genes.iloc[:,2], best_genes.iloc[:,4], color = 'red', label="a")

plt.show()

In [None]:
# fig, axs = plt.subplots(1,2, figsize=(15,5))

# fig.suptitle('Última generación de hipermarámetros obtenidos')

# # axs[0].plot(df.iloc[:,0], df.iloc[:,1], alpha=0.5, c='blue', label="Model pred 1")
# axs[0].scatter(df.iloc[:,0], df.iloc[:,1], s=20, alpha=0.3, c='red', label="Model pred 1")
# axs[0].scatter(best_genes.iloc[:,0], best_genes.iloc[:,1], s=80, alpha=1, c='red', label="Mejores individuos")
# axs[0].grid(); axs[0].legend()
# # axs[0].legend(loc='upper right'); 
# # axs[0].set_xlim([2, 18]); axs[0].set_ylim([1, 65])
# axs[0].set(xlabel=r'Deep size', ylabel='Num units')

# # axs[1].plot(df.iloc[:,2], df.iloc[:,3], alpha=0.5, c='blue', label="Model pred 1")
# axs[1].scatter(df.iloc[:,2], df.iloc[:,3], s=20, alpha=0.3, c='red', label="Model pred 1")
# axs[1].scatter(best_genes.iloc[:,2], best_genes.iloc[:,3], s=80, alpha=1, c='red', label="Mejores individuos")
# axs[1].set_yscale("log"); 
# axs[1].grid(); axs[1].legend(loc='upper right')
# # axs[1].set_xlim([1e-4, 31e-4]); axs[1].set_ylim([2**1, 2**4])
# axs[1].set(xlabel=r'Learning rate', ylabel='Batch size')