# Competición CNN 

Autores: David García Lleyda, Álvaro Martínez Parpolowicz, Alexis Gómez Chimeno

Clase: MAIS 5ºA


## 1. Librerías

In [None]:
import numpy as np
import keras
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import scipy
from tensorflow.keras.layers import CategoryEncoding
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, BatchNormalization, MaxPooling2D
from sklearn.metrics import confusion_matrix
from tensorflow.keras import metrics,layers
from tensorflow.keras.layers import Dropout
from tensorflow.keras.regularizers import l2
import os
import pandas as pd
import cv2 
import json
from sklearn.model_selection import train_test_split
from collections import OrderedDict
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras import backend as K
import tensorflow_addons as tfa
from tensorflow.python.client import device_lib 
import optuna
import plotly
from mlflow import MlflowClient
import mlflow
import mlflow.keras
from pprint import pprint
import logging
import matplotlib.pyplot as plt
from sklearn.metrics import multilabel_confusion_matrix
import seaborn as sns

## 2. Declaración de MLflow

Vamos a utilizar Mlflow que es una herramienta del ámbito de MLOps para hacer un seguimiento de las diferentes características de la CNN. De esta manera conseguimos guardar los resultados y compararlos de forma sencilla.

En las siguientes celdas declaramos el experimento y especificamos la URI con la que se debe comunicar para mostrar los resultados.

In [None]:
mlflow.set_tracking_uri("http://127.0.0.1:8080")

In [None]:
client = MlflowClient(tracking_uri="http://127.0.0.1:8080")
experiment_description = (
    "This is the multilabel classification project for VICO. "
    "This experiment contains the produce models for multilabel classification."
)
experiment_tags = {
    "project_name": "multilabel_classification",
    "store_dept": "produce",
    "team": "stores-ml",
    "project_quarter": "Q1-2024",
    "mlflow.note.content": experiment_description,
}
produce_multilabel_experiment = client.create_experiment(
    name="Multilabel_Model", tags=experiment_tags
)

## 3. Solución del ejercicio



### 3.1 Comprobación de dispositivos disponibles donde entrenar el modelo

En primer lugar, como el objetivo es utilizar aceleración por GPU para agilizar los cálculos de la CNN, vamos a comprobar que tensorflow detecta la GPU.

In [None]:
def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos]
print(get_available_devices()) 

En este caso, podemos ver que sí la detecta. En caso de no detectarla, el código seguiría funcionando pero el entrenamiento sería mucho más lento.

### 3.2 Carga de los datos y etiquetas previamente guardadas

En esta sección definiremos las rutas de donde se van a coger las imágenes, calcularemos el tamaño de la imagen más pequeña, cargaremos las etiquetas correspondientes a cada una de las imágenes desde un formato .json y guardaremos los nombre de los archivos así como las etiquetas en un dataframe.

Este último paso es necesario por varios motivos:

1. Los generadores de tensorflow son necesarios para poder ir cargando por lotes las imágenes a la GPU y no sobrecargar su VRAM.
2. Los generadores permiten definir ciertas características como son el tamaño de los batches, si los datos deben mezclarse de forma aleatoria, la semilla a utilizar y un tamaño objetivo para cargar las imágenes.

Ambos motivos hacen que sean imprescindible para el correcto funcionamiento y entrenamiento del modelo.

Además, también definimos en una lista las 5 posibles etiquetas que asignamos en el entrenamiento multietiqueta a cada una de las imágenes.

In [None]:
relative_path_train='./Sports_Final/train/'
relative_path_valid='./Sports/valid/'
relative_path_test='./Sports/test/'

In [None]:
def calculate_smallest_image_in_folder(path):
    images = os.listdir(path)
    smallest_image = None
    smallest_diagonal = float('inf')
    for image in images:
        image_path = os.path.join(path, image)
        current_image = cv2.imread(image_path)
        diagonal_length = np.sqrt(current_image.shape[0]**2 + current_image.shape[1]**2)
        if diagonal_length < smallest_diagonal:
            smallest_diagonal = diagonal_length
            smallest_image = current_image
    return smallest_image.shape[0], smallest_image.shape[1]

for path in [relative_path_train, relative_path_valid, relative_path_test]:
    dirs_in_path=os.listdir(path)
    smallest_diagonal = float('inf')
    smallest_image=None
    for dir in dirs_in_path:
        dir_path=os.path.join(path,dir)
        smallest_image_width,smallest_image_height=calculate_smallest_image_in_folder(dir_path)
        actual_image=np.zeros((smallest_image_width,smallest_image_height,3))
        actual_diagonal = np.sqrt(smallest_image_width**2 + smallest_image_height**2)
        if actual_diagonal<smallest_diagonal:
            smallest_diagonal=actual_diagonal
            smallest_image=actual_image
            print(f'Smallest image in {dir_path} has width {smallest_image_width} and height {smallest_image_height}')

print('Smallest image has dimensions', smallest_image.shape) ## Too big for our purposes
print('Smallest image has diagonal', smallest_diagonal) 


In [None]:
# width=smallest_image.shape[0]
# height=smallest_image.shape[1]
width=64
height=64

In [None]:
columns=['animals', 'car', 'cycle', 'person', 'water']
filename_column='filename'

In [None]:
with open('one_hot_train_augmented_by_folders.json','r') as file:
    train_data = json.load(file)
    file.close()
# Create an empty list to store the data
data_train_list = []

# Iterate over the items in the JSON file
for filename, one_hot_vector in train_data.items():
    # Append the filename and one-hot vector as a tuple to the list
    data_train_list.append((filename, one_hot_vector))

# Convert the list of tuples into a pandas DataFrame
df_train = pd.DataFrame(data_train_list)

df_train.columns = [filename_column, 'One-Hot Vector']
# Split the "One-Hot Vector" column into separate columns
df_train[columns] = pd.DataFrame(df_train['One-Hot Vector'].tolist())

# Rename the columns to match the categories
df_train.drop(columns=['One-Hot Vector'], inplace=True)

# Print the resulting DataFrame
df_train

In [None]:
with open('one_hot_valid_by_folders.json','r') as file:
    valid_data = json.load(file)
    file.close()

# Create an empty list to store the data
data_valid_list = []

# Iterate over the items in the JSON file
for filename, one_hot_vector in valid_data.items():
    # Append the filename and one-hot vector as a tuple to the list
    data_valid_list.append((filename, one_hot_vector))

# Convert the list of tuples into a pandas DataFrame
df_valid = pd.DataFrame(data_valid_list)

df_valid.columns = [filename_column, 'One-Hot Vector']
# Split the "One-Hot Vector" column into separate columns
df_valid[columns] = pd.DataFrame(df_valid['One-Hot Vector'].tolist())

# Rename the columns to match the categories
df_valid.drop(columns=['One-Hot Vector'], inplace=True)

# Print the resulting DataFrame
df_valid

In [None]:

with open('one_hot_test_by_folders.json','r') as file:
    test_data = json.load(file)
    file.close()

# Create an empty list to store the data
data_test_list = []

# Iterate over the items in the JSON file
for filename, one_hot_vector in test_data.items():
    # Append the filename and one-hot vector as a tuple to the list
    data_test_list.append((filename, one_hot_vector))

# Convert the list of tuples into a pandas DataFrame
df_test = pd.DataFrame(data_test_list)

df_test.columns = [filename_column, 'One-Hot Vector']
# Split the "One-Hot Vector" column into separate columns
df_test[columns] = pd.DataFrame(df_test['One-Hot Vector'].tolist())

# Rename the columns to match the categories
df_test.drop(columns=['One-Hot Vector'], inplace=True)
# Replace "False" with 0 and "True" with 1 in the DataFrame
df_test.replace({False: 0, True: 1}, inplace=True)

# Print the resulting DataFrame
df_test


In [None]:
datagen=ImageDataGenerator(rescale=1./255.)
test_datagen=ImageDataGenerator(rescale=1./255.)

In [None]:

train_generator=datagen.flow_from_dataframe(
dataframe=df_train,
directory=relative_path_train,
x_col=filename_column,
y_col=columns,
batch_size=32,
seed=42,
shuffle=True,
class_mode="raw",
target_size=(width,height))

In [None]:
valid_generator=test_datagen.flow_from_dataframe(
dataframe=df_valid,
directory=relative_path_valid,
x_col=filename_column,
y_col=columns,
batch_size=32,
seed=42,
shuffle=True,
class_mode="raw",
target_size=(width,height))

In [None]:
test_generator=test_datagen.flow_from_dataframe(
dataframe=df_test,
directory=relative_path_test,
x_col=filename_column,
batch_size=1,
seed=42,
shuffle=False,
class_mode=None,
target_size=(width,height))

In [None]:
# train = [(load_image(relative_path_train + i), train_data[i]) for i in train_data]
# valid= [(load_image(relative_path_valid + i), valid_data[i]) for i in valid_data]
# test = [(load_image(relative_path_test + i), test_data[i]) for i in test_data]


In [None]:
# train_images = np.array([t[0] for t in train], dtype='float32')
# train_labels = np.array([t[1] for t in train], dtype='float32')
# valid_images = np.array([t[0] for t in valid],  dtype='float32')
# valid_labels = np.array([t[1] for t in valid], dtype='float32')
# test_images = np.array([t[0] for t in test],  dtype='float32')
# test_labels = np.array([t[1] for t in test], dtype='float32')

### 3.3 Definición de la arquitectura del modelo y sus hiperparámetros

En esta sección definimos cada uno de los hiperpárametros de la red, sus callbacks y su arquitectura.

Esta sección acaba con el entrenamiento del modelo.

In [None]:
STEP_SIZE_TRAIN=train_generator.n // train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

In [None]:
PREDICT_SIZE_TRAIN=train_generator.n
PREDICT_SIZE_TEST=test_generator.n

In [None]:
save_best_model_callback = ModelCheckpoint(
    filepath="best_model.h5",
    monitor="val_f1_score",
    save_best_only=True,
    save_weights_only=False,
    mode="max",
    verbose=1,
)
early_stopping_callback = EarlyStopping(
    monitor="val_f1_score",  # Choose the metric to monitor for improvement
    patience=8,  # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True,  # Restore model weights from the epoch with the best value of the monitored quantity
    verbose=1,
    mode="max",
)
reduce_lr_callback = ReduceLROnPlateau(
    monitor="val_f1_score", factor=0.2, patience=3, mode="max"
)

In [None]:
first_layer_filters=32
second_layer_filters=64
third_layer_filters=128
fourth_layer_filters=256
# fifth_layer_neurons=512
kernel_size_first_layer=(3,3)
kernel_size_second_layer=(5,5)
kernel_size_third_layer=(3,3)
kernel_size_fourth_layer=(5,5)
max_pooling_size=(2,2)

In [None]:
with tf.device('/device:GPU:0'):
    model = Sequential()
    # model.add(layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"))
    # model.add(layers.experimental.preprocessing.RandomRotation(0.2))
    model.add(Conv2D(first_layer_filters, kernel_size_first_layer,padding='same', activation='relu', input_shape=(width,height,3)))
    model.add(Conv2D(first_layer_filters, kernel_size_first_layer,padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(max_pooling_size))

    model.add(Conv2D(second_layer_filters, kernel_size_second_layer, activation='relu',padding='same'))
    model.add(Conv2D(second_layer_filters, kernel_size_second_layer,padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(max_pooling_size))

    model.add(Conv2D(third_layer_filters, kernel_size_third_layer, activation='relu',padding='same'))
    model.add(Conv2D(third_layer_filters, kernel_size_third_layer, activation='relu',padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(max_pooling_size))

    model.add(Conv2D(fourth_layer_filters, kernel_size_second_layer, activation='relu',padding='same'))
    model.add(Conv2D(fourth_layer_filters, kernel_size_second_layer, activation='relu',padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(max_pooling_size))

    model.add(Flatten())
    model.add(Dense(width*width,kernel_initializer="random_normal", activation='relu', kernel_regularizer=l2(0.01)))
    model.add(Dense(256,kernel_initializer="random_normal", activation='relu', kernel_regularizer=l2(0.01)))

    model.add(Dense(5, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=[tfa.metrics.F1Score(average="macro",num_classes=5,threshold=0.5)])

    history = model.fit(x=train_generator,steps_per_epoch=STEP_SIZE_TRAIN,validation_data=valid_generator,validation_steps=STEP_SIZE_VALID,
                        epochs=40,callbacks=[save_best_model_callback,early_stopping_callback,reduce_lr_callback])


In [None]:
history.history.keys()

### 3.4 Visualización de la evolución de las métricas

En esta sección se implementan las gráficas de evolución de las métricas de interés de nuestro modelo.

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['f1_score'])
plt.plot(history.history['val_f1_score'])
plt.title('model f1_score')
plt.ylabel('f1_score')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

### 3.5 Guardado del modelo

Esta sección trata la implementación del guardado del modelo generado por el entrenamiento en un archivo .h5.

In [None]:
model.save('model_convolucionesApiladas_2densas.h5')

### 3.6 Generación de las predicciones


Con el modelo ya entrenado, falta generar las predicciones que realiza el modelo sobre los datos de train y test. Para ello se realizan varios pasos:

1. Se utiliza un nuevo generador que utilice lotes de una imagen para que los resultados de las métricas no vengan condicionados por el tamaño de lote. El objetivo principal es quedarse con la métrica evaluada habiendo hecho la predicción de las imágenes una por una.
2. Se utiliza la función model.predict pasándole todos los datos de train por un lado y todos los datos de test por otro. Esto genera la probabilidad de pertenencia a cada una de las clases definidas en la etiqueta por cada imagen.
3. Se define un umbral de clasificación a partir del cual consideraremos que pertenece a una clase u otra.



In [None]:
train_generator_to_predict=datagen.flow_from_dataframe(
dataframe=df_train,
directory=relative_path_train,
x_col=filename_column,
y_col=columns,
batch_size=1,
seed=42,
shuffle=True,
class_mode="raw",
target_size=(width,height))

In [None]:
test_generator.reset()
pred_test=model.predict(x=test_generator,
steps=PREDICT_SIZE_TEST,
verbose=1)
train_generator_to_predict.reset()
pred_train=model.predict(x=train_generator_to_predict,
steps=PREDICT_SIZE_TRAIN,
verbose=1)

En este caso se ha decidido utilizar un umbral de 0.5. No es mayor que 0.5 puesto que no necesitamos que el modelo esté completamente seguro de su pertenencia a una clase para clasificarlo como tal. De igual modo, no se establece un umbral menor para no sesgar a la hora de evalúar la pertenencia o no, indicando el mismo tamaño para la pertenencia de una clase como su no pertenencia.

In [None]:
pred_bool_test=(pred_test >0.5).astype(int)
pred_bool_train=(pred_train >0.5).astype(int)

In [None]:
df_train2 = df_train.copy()
df_test2 = df_test.copy()

columns_to_encode = [col for col in df_train2.columns if col != filename_column]

# Encode the columns into one-hot vectors
df_train2[columns_to_encode] = df_train2[columns_to_encode].apply(lambda x: x.astype(int))
df_test2[columns_to_encode] = df_test2[columns_to_encode].apply(lambda x: x.astype(int))

# Convert the rows into a numpy array
train_labels = df_train2[columns_to_encode].to_numpy()
test_labels = df_test2[columns_to_encode].to_numpy()

### 3.7 Generación y visualiación de la matriz de confusión
En esta sección se implementa el código para generar las matrices de confusión y su visualización. Estas matrices se generaran tanto para los datos de test como para los de train.

In [None]:
def plot_confusion_matrix(conf_matrices,type='train'):
    plt.figure(figsize=(15, 12))
    sns.set(font_scale=1.2)  # Adjust font size if needed
    for i, conf_matrix in enumerate(conf_matrices, 1):
        plt.subplot(3, 2, i)
        conf_matrix_normalized = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]
        sns.heatmap(conf_matrix_normalized, annot=True, fmt=".2%", cmap="Blues", cbar=False,
                    xticklabels=["Class 0", "Class 1"],
                    yticklabels=["Class 0", "Class 1"])
        plt.title(f"Confusion Matrix {columns[i-1]} "+type)
        plt.xlabel("Predicted Label")
        plt.ylabel("True Label")

    plt.tight_layout()
    plt.show()

In [None]:
plot_confusion_matrix(multilabel_confusion_matrix(train_labels, pred_bool_train),type='train')

In [None]:
plot_confusion_matrix(multilabel_confusion_matrix(test_labels, pred_bool_test),type='test')

Se puede observar que hay underfitting por parte del modelo, ya que parece que está confundiendo de más las imágenes modificadas pero que las imágenes originales las acierta de manera considerable. Esto lo vemos por la diferencia de resultados en train y en test.

### 3.8 Evaluación de las métricas del modelo
A continuación, se calculan las métricas asociadas al modelo. Concretamente, se muestran por pantalla los resultados de las métricas en dos niveles:
1. A nivel de cada clase en las etiquetas.
2. Utilizando el valor de 'macro' para hacer una media aritmética de todas las clases dentro de la etiqueta.
3. Utilizando el valor 'weighted' que hace una media ponderada de cada una de las clases dentro de la etiqueta.

#### 3.8.1 Métricas de cada clase en la etiqueta

In [None]:
accuracy_train = accuracy_score(train_labels, pred_bool_train)

# Calculate F1 score
f1_train = f1_score(train_labels, pred_bool_train, average=None)

# Calculate precision
precision_train = precision_score(train_labels, pred_bool_train, average=None)

# Calculate recall
recall_train = recall_score(train_labels, pred_bool_train, average=None)

# Calculate accuracy
accuracy_test = accuracy_score(test_labels, pred_bool_test)

# Calculate F1 score
f1_test = f1_score(test_labels, pred_bool_test, average=None)

# Calculate precision
precision_test= precision_score(test_labels, pred_bool_test, average=None)

# Calculate recall
recall_test = recall_score(test_labels, pred_bool_test, average=None)

print("Accuracy train:", accuracy_train)
print("F1 Score train:", f1_train)
print("Precision train:", precision_train)
print("Recall train:", recall_train)
print("Accuracy test:", accuracy_test)
print("F1 Score test:", f1_test)
print("Precision test:", precision_test)
print("Recall test:", recall_test)

#### 3.8.2 Métricas calculadas de forma MACRO (media aritmética)

In [None]:
# Calculate accuracy
accuracy_train = accuracy_score(train_labels, pred_bool_train)

# Calculate F1 score
f1_train = f1_score(train_labels, pred_bool_train, average='macro')

# Calculate precision
precision_train = precision_score(train_labels, pred_bool_train, average='macro')

# Calculate recall
recall_train = recall_score(train_labels, pred_bool_train, average='macro')

# Calculate accuracy
accuracy_test = accuracy_score(test_labels, pred_bool_test)

# Calculate F1 score
f1_test = f1_score(test_labels, pred_bool_test, average='macro')

# Calculate precision
precision_test= precision_score(test_labels, pred_bool_test, average='macro')

# Calculate recall
recall_test = recall_score(test_labels, pred_bool_test, average='macro')

print("Accuracy train:", accuracy_train)
print("F1 Score train:", f1_train)
print("Precision train:", precision_train)
print("Recall train:", recall_train)
print("Accuracy test:", accuracy_test)
print("F1 Score test:", f1_test)
print("Precision test:", precision_test)
print("Recall test:", recall_test)


#### 3.8.3 Métricas calculadas de forma Weighted (media ponderada)

In [None]:
# Calculate accuracy
accuracy_train = accuracy_score(train_labels, pred_bool_train)

# Calculate F1 score
f1_train = f1_score(train_labels, pred_bool_train, average='weighted')

# Calculate precision
precision_train = precision_score(train_labels, pred_bool_train, average='weighted')

# Calculate recall
recall_train = recall_score(train_labels, pred_bool_train, average='weighted')

# Calculate accuracy
accuracy_test = accuracy_score(test_labels, pred_bool_test)

# Calculate F1 score
f1_test = f1_score(test_labels, pred_bool_test, average='weighted')

# Calculate precision
precision_test= precision_score(test_labels, pred_bool_test, average='weighted')

# Calculate recall
recall_test = recall_score(test_labels, pred_bool_test, average='weighted')

print("Accuracy train:", accuracy_train)
print("F1 Score train:", f1_train)
print("Precision train:", precision_train)
print("Recall train:", recall_train)
print("Accuracy test:", accuracy_test)
print("F1 Score test:", f1_test)
print("Precision test:", precision_test)
print("Recall test:", recall_test)

Es de esperar que la media ponderada según el número de apariciones de cada una de las clases sea mayor, puesto que mayoritariamente aparecen personas en todas las imágenes. Por este motivo, consideramos que la media aritmética es más representativa de la capacidad de la red neuronal.

### 3.9 Guardado de resultado en el backend de Mlflow

En este apartado se indica el código a través del cual cargamos el modelo y especificamos todos los hiperparámetros utilizados por la red neuronal para guardarlo en el backend de Mlflow.

Es importante resaltar que como el modelo ha sido entrenado en GPU, al cargarlo con el parámetro "compile" con valor True, mlflow trata de buscar en las secciones de memoria RAM equivocadas y no carga de forma correcta. Como nuestro objetivo no es guardar el modelo en mlflow, se ha decidido dejar con el compile a False para tener solo una referencia al modelo pero no su archivo final.

In [None]:
model2=keras.models.load_model('./model_convolucionesApiladas_2densas.h5',compile=False)

In [None]:
multilabel_experiment = mlflow.set_experiment("Multilabel_Model")

# Define a run name for this iteration of training.
# If this is not set, a unique name will be auto-generated for your run.
run_name = "multilabel_model_2dense_rf_test"

# Define an artifact path that the model will be saved to.
artifact_path = "ar_2dense_rf_test"

params = {
    "kernel_size_first_layer": kernel_size_first_layer,
    "neurons_first_layer": first_layer_filters,
    "kernel_size_second_layer": kernel_size_second_layer,
    "neurons_second_layer": second_layer_filters,
    "kernel_size_third_layer": kernel_size_third_layer,
    "neurons_third_layer": third_layer_filters,
    "kernel_size_fourth_layer": kernel_size_fourth_layer,
    "neurons_fourth_layer": fourth_layer_filters,
    "first_big_dense_layer": 3,
    "second_big_dense_layer": 2,
    "third_big_dense_layer": 3,
    "fourth_big_dense_layer": 3,
    "neurons_dense_layer": width*width,
    "neurons_dense_layer2": 256,
    "neurons_dense_layer3": 5,
    "activation": "relu",
    "optimizer": "adam",
    "loss": "binary_crossentropy",
    "image_size": (width,height),
    "batch_size": 32,
    "epochs": 40,
    "callbacks": [save_best_model_callback,early_stopping_callback,reduce_lr_callback],
    "initializer": "random_normal",
    "regularizer": "l2(0.01)"
}
metrics = {"f1_test": f1_test, "precision_test": precision_test, "recall_test": recall_test}
with mlflow.start_run(run_name=run_name) as run:
    # Log the parameters used for the model fit
    mlflow.log_params(params)

    # Log the error metrics that were calculated during validation
    mlflow.log_metrics(metrics)

    # Log an instance of the trained model for later use
    mlflow.keras.log_model(
        model2,  "models"
    )

## 4. Optuna - Extra

Una de las tantas modificaciones que se añadieron a la red original fue el uso del optimizador de hiperparámetros optuna. No obstante, realizar pruebas con distintos hiperpárametros sigue siendo lento por la complejidad de la red establecida. Aunque sus resultados no han sido definitivos para entregar el modelo final, sí que se considera que aportan valor a un futuro refinamiento de la red. Es por este motivo por lo que se aporta el código como una modificación añadida.

In [None]:

train_generator=datagen.flow_from_dataframe(
dataframe=df_train,
directory=relative_path_train,
x_col=filename_column,
y_col=columns,
batch_size=32,
seed=42,
shuffle=True,
class_mode="raw",
target_size=(width,height))

In [None]:
best_model=None
model=None

In [None]:
def create_model(trial, min_neurons=1, max_neurons=6, max_layers=6):
    """Función para crear un modelo de red neuronal"""
    
    # Número de capas
    n_layers = 2
    
    
    # Valor del parámetro de regularización L2
    weight_decay = trial.suggest_float("weight_decay", 1e-3, 1e-1)
    
    # Red Neuronal
    model = Sequential()
    
    #Valor del kernel size
    kernel_size = trial.suggest_int("kernel_size", 3, 9, step=2)
    # Capa oculta 1
    num_first_layer = trial.suggest_int("n_units_0", min_neurons, max_neurons)
    model.add(Conv2D(num_first_layer, (kernel_size, kernel_size),padding='same', activation='relu', input_shape=(width,height,3)))
    model.add(Conv2D(num_first_layer, (kernel_size, kernel_size),padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    # Resto de capas ocultas
    for i in range(n_layers):
        num_hidden_neurons = trial.suggest_int("n_units_{}".format(i+1), min_neurons, max_neurons)
        model.add(Conv2D(num_hidden_neurons, (kernel_size, kernel_size), activation='relu',padding='same'))
        model.add(Conv2D(num_hidden_neurons, (kernel_size, kernel_size),padding='same', activation='relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling2D((2, 2)))
    
    # Capa de salida
    model.add(Flatten())
    model.add(Dense(width, activation='relu', kernel_regularizer=l2(weight_decay)))

    model.add(Dense(5, activation='sigmoid'))
    return model

In [None]:
def create_optimizer(trial):
    """Función que devuelve un optimizador RMSprop o Adam con sus hiperparámetros"""

    kwargs = {}
    
    # Optimizadores
    optimizer_selected = trial.suggest_categorical("optimizer", ["RMSprop", "Adam"])
    
    # https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/experimental/RMSprop
    if optimizer_selected == "RMSprop":
        kwargs["learning_rate"] = trial.suggest_float("rmsprop_learning_rate", 1e-3, 1e-1)
        kwargs["momentum"] = trial.suggest_float("rmsprop_momentum", 1e-3, 1e-1)
    
    # https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam
    elif optimizer_selected == "Adam":
        kwargs["learning_rate"] = trial.suggest_float("adam_learning_rate", 1e-2, 1e-1)

    return getattr(tf.optimizers, optimizer_selected)(**kwargs)

In [None]:
def callback(study, trial):
    """Función que se ejecutará en la función "objetive" para guardar el mejor modelo
    entrenado hasta el momento.
    """
    global best_model
    if study.best_trial == trial:
        best_model=model


In [None]:
def objective(trial):
    """Función que entrena y evalua "un intento" de un modelo de red neuronal"""
    global model
    save_best_model_callback = ModelCheckpoint(
        filepath="best_model_optuna.h5",
        monitor="val_f1_score",
        save_best_only=True,
        save_weights_only=False,
        mode="max",
        verbose=1,
    )
    early_stopping_callback = EarlyStopping(
        monitor="val_f1_score",  # Choose the metric to monitor for improvement
        patience=8,  # Number of epochs with no improvement after which training will be stopped
        restore_best_weights=True,  # Restore model weights from the epoch with the best value of the monitored quantity
        verbose=1,
        mode="max",
    )
    reduce_lr_callback = ReduceLROnPlateau(
        monitor="val_f1_score", factor=0.2, patience=4, mode="max"
    )
    # Construimos el modelo
    model = create_model(trial, max_layers=3, max_neurons=128, min_neurons=32)

    # Obtenemos el optimizador
    optimizer = create_optimizer(trial)

    # Compilamos el modelo
    model.compile(
        loss="binary_crossentropy",
        optimizer=optimizer,
        metrics=[tfa.metrics.F1Score(average="macro", num_classes=5)],
    )
    with tf.device("/device:GPU:0"):
        # Entrenamos el modelo
        history = model.fit(
            x=train_generator,
            steps_per_epoch=STEP_SIZE_TRAIN,
            validation_data=valid_generator,
            validation_steps=STEP_SIZE_VALID,
            epochs=20,
            callbacks=[
                save_best_model_callback,
                early_stopping_callback,
                reduce_lr_callback,
            ],
        )

    # Definicion de la métrica
    test_generator.reset()
    pred_test = model.predict(x=test_generator, steps=PREDICT_SIZE_TEST)
    pred_bool_test = (pred_test > 0.5).astype(int)

    # Evaluamos la red neuronal y devolvemos el valor de la métrica F1 con los datos de test
    #     return f1_score(y_true=y_test, y_pred=np.where(model.predict(X_test) > 0.5, 1,0))
    return f1_score(test_labels, pred_bool_test, average="macro")

In [None]:
save_best_model_callback = ModelCheckpoint(
    filepath='best_model_optuna.h5',
    monitor='val_f1_score',
    save_best_only=True,
    save_weights_only=False,
    mode='max',
    verbose=1
)
early_stopping_callback = EarlyStopping(
    monitor='val_f1_score',  # Choose the metric to monitor for improvement
    patience=8,            # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True,  # Restore model weights from the epoch with the best value of the monitored quantity
    verbose=1,
    mode='max'
)
reduce_lr_callback = ReduceLROnPlateau(
    monitor="val_f1_score", factor=0.2, patience=4, mode="max"
)

In [None]:
trial = []
study = optuna.create_study(direction="maximize", study_name="MLP Tensorflow")
func = lambda trial: objective(trial=trial)
study.optimize(func, n_trials=15, callbacks=[callback])

In [None]:
# best_model.save('best_model_optuna.h5')