# Notebook para pruebas de train en Colab

## Inicialización

In [1]:
# Importación de librerías
# Gestión de archivos y reporte
import os
import shutil
import logging
import yaml

# Manipulación y análisis de datos
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Procesamiento de imágenes
from PIL import Image

# Machine Learning
import tensorflow as tf
from tensorflow.keras import datasets, layers, models

2025-04-21 23:07:34.360630: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-21 23:07:34.401133: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745287654.416027 1367172 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745287654.422415 1367172 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1745287654.459966 1367172 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

### Carga de datos almacenados

In [2]:
# Cargamos el dataframe desde el .CSV y definimos 'id' como índice
try:
    df_split = pd.read_csv('dataframe_splitted.csv').set_index('id')
except FileNotFoundError:
    print(f"⚠️ Error: El archivo 'dataframe.csv' no se encontró en la ubicación actual: {os.getcwd()}")
    print("🚨 Se creará nuevamente al correr las celdas de 'Importación de imágenes' 🚨.")
    df_split = None
except Exception as e:
    print(f"Ocurrió un error al leer el archivo CSV: {e}")
    df_split = None

In [3]:
df_split.head()

Unnamed: 0_level_0,image_path,filename,class,group,tag,split
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,Strawberry___healthy/,8f558908-aa1b-4a86-855a-5094c2392e5a___RS_HL 1...,Strawberry___healthy,Strawberry,healthy,train
1,Strawberry___healthy/,b8e9ed27-8e37-4214-9206-f8c0ef21cf4d___RS_HL 4...,Strawberry___healthy,Strawberry,healthy,test
2,Strawberry___healthy/,abdd34a0-ab02-41e0-95a3-a014ab863ec2___RS_HL 1...,Strawberry___healthy,Strawberry,healthy,train
3,Strawberry___healthy/,d1aee44a-b6bb-45b9-b7b6-5d553add8fd1___RS_HL 2...,Strawberry___healthy,Strawberry,healthy,train
4,Strawberry___healthy/,3d28c3ea-8419-4e09-addd-211e3828e39f___RS_HL 1...,Strawberry___healthy,Strawberry,healthy,train


In [4]:
# Cargar variables desde el archivo YAML
try:
    # Verificar si el archivo YAML existe
    yaml_filename = "constants.yaml"
    with open(yaml_filename, "r") as yaml_file:
        constants_data = yaml.safe_load(yaml_file)

    # Acceder a las variables
    ROOT_DIR = constants_data.get("ROOT_DIR")
    DATASET_PATH = constants_data.get("DATASET_PATH")
    SPLITTED_PATH = constants_data.get("SPLITTED_PATH")

    print(f"✅ Se han cargado las variables de configuración desde '{yaml_filename}'")
    print(f" - ROOT_DIR: {ROOT_DIR}")
    print(f" - DATASET_PATH: {DATASET_PATH}")
    print(f" - SPLITTED_PATH: {SPLITTED_PATH}")
except FileNotFoundError:
    print(f"Error: El archivo 'constants.yaml' no se encontró en la ubicación actual: {os.getcwd()}")
    print("Se creará nuevamente al correr el notebook.")
    ROOT_DIR = None
    DATASET_PATH = None
    SPLITTED_PATH = None
except Exception as e:
    print(f"Ocurrió un error al leer el archivo YAML: {e}")

✅ Se han cargado las variables de configuración desde 'constants.yaml'
 - ROOT_DIR: /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/versions/3/plantvillage dataset/color
 - DATASET_PATH: /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/versions/3
 - SPLITTED_PATH: /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/


### Funciones necesarias

In [5]:
import os, re

# Busca la carpeta raíz del dataset en el directorio donde fue descargado
def find_path(folder):
    match = re.search(fr"^(.*?)/{folder}/", DATASET_PATH)
    if match:
        prefix = match.group(1)
        path = os.path.join(prefix, f"{folder}/")
        return path
    else:
        print(f'No se ha podido encontrar la carpeta "{folder}" en {DATASET_PATH}')
        return None

In [6]:
# Carga de imagenes en memoria y visualización
def load_image(data: pd.DataFrame, index: int, root: str=ROOT_DIR):
    """
    Carga una imagen PIL desde una fila específica de un DataFrame.

    Args:
        dataframe (pandas.DataFrame): El DataFrame que contiene las rutas de las imágenes.
        index (int): El índice de la fila en el DataFrame para cargar la imagen.
        root_dir (str): El directorio raíz donde se encuentran las imágenes.

    Returns:
        PIL.Image.Image: La imagen cargada como un objeto PIL.Image, o None si ocurre un error.
    """
    if index < 0 or index >= len(data):
        print("Índice fuera de rango.")
        return None

    row = data.iloc[index]
    relative_path = row['image_path']
    filename = row['filename']
    full_path = os.path.join(root, relative_path, filename)

    try:
        img = Image.open(full_path)
        return img
    except FileNotFoundError:
        print(f"Archivo no encontrado: {full_path}")
        return None
    except Exception as e:
        print(f"Error al cargar la imagen: {e}")
        return None

#### Descarga de dataset de Kaggle

In [7]:
import kagglehub

# Download latest version
DATASET_PATH = kagglehub.dataset_download("abdallahalidev/plantvillage-dataset")

print("Path to dataset files:", DATASET_PATH)

Path to dataset files: /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/versions/3


Decidimos en principio trabajar con el dataset con imágenes a color por ser el que contiene mayor información.

In [8]:
# Ruta de acceso al dataset
ROOT_DIR = f'{DATASET_PATH}/plantvillage dataset/color'

# Dataset split

#### Funciones

In [9]:
def dataset_already_exists(path_to_check: str) -> bool | None:
    """
    Verifica si el directorio especificado existe y está vacío.

    Args:
        path_to_check (str): Ruta del directorio a verificar.

    Returns:
        bool: True si el directorio existe y está vacío, False en caso contrario.
    """
    if not os.path.exists(path_to_check):
        # El directorio no existe -> Crea el directorio
        #print(f"☑️ El directorio no existe, aún no ha sido creado:\n > {path_to_check}") # Debugging
        return False # No realiza ninguna acción
    else:
        # Verificar si el directorio está vacío
        try:
            # Explora el contenido del directorio
            content = os.listdir(path_to_check)
            #print(content) # Debugging

            # Si el directorio está vacío, se puede eliminar directamente
            #       -> Elimina sin confirmación
            if not content:
                os.rmdir(path_to_check) # Elimina el directorio vacío
                print(f"☑️ El directorio estaba vacío y se ha eliminado de forma automática:\n > {path_to_check}\n")
                return False

            # Si el directorio contiene sólo archivos ocultos (de sistema)
            #       -> Elimina sin confirmación
            elif all([file.startswith('.') for file in content]):
                shutil.rmtree(path_to_check) # Elimina el directorio y su contenido
                print(f"☑️ El directorio sólo contenía archivos ocutlos, por lo que se ha eliminado de forma automática:\n > {path_to_check}\n")
                return False

            # Si hay archivos visibles en el directorio (dataset ya existe)
            #       -> Solicita permiso para eliminarlos
            else:
                # Input de confirmación del usuario
                confirmacion = input(f"⚠️ El directorio especificado ya existe y contiene archivos. ¿Deseas eliminar todo su contenido y el directorio en sí? [Y/N]: '{path_to_check}'").strip().lower()
                # Verifica la respuesta del usuario
                if confirmacion == 'y':
                    shutil.rmtree(path_to_check) # Elimina el directorio y su contenido
                    print(f"✅ El directorio y su contenido han sido eliminados exitosamente:\n > {path_to_check}\n")
                    return False
                else:
                    print(f"⛔️ La eliminación del directorio ha sido denegada por el usuario:\n  > {path_to_check}")
                    return True

        except OSError as e:
            print(f"❌ Error al eliminar el directorio vacío en {path_to_check}: {e}\n")
            return None
        except Exception as e:
            print(f"‼️ Ocurrió un error inesperado al intentar eliminar el directorio vacío en {path_to_check}: {e}\n")
            return None

In [10]:
def create_ignore_function(df, train_label, filename_col='filename'):
    """
    Crea y devuelve la función 'ignore_files' que tiene acceso al DataFrame
    y sabe qué archivos mantener.
    """
    # Crea un conjunto (set) con los nombres de archivo que SÍ queremos copiar (ej: split == 'train')
    # Usa este conjunto para hacer la búsqueda de forma mucho más rápida
    files_to_keep = set(df[df['split'] == train_label][filename_col])
    #print(f"Archivos a mantener (split='{train_label}'): {files_to_keep}") # Debugging

    def ignore_files(current_dir, files_in_current_dir):
        """
        Función que será llamada por shutil.copytree.
        Decide qué archivos/directorios ignorar en el directorio actual.
        """
        ignore_list = []
        for item in files_in_current_dir:
            # Construye la ruta completa para verificar si es archivo o directorio
            full_path = os.path.join(current_dir, item)

            # Aplicar la lógica de ignorar SOLO los ARCHIVOS de la lista
            if os.path.isfile(full_path):
                # Si el nombre del archivo NO está en el conjunto de archivos a mantener,
                # entonces lo agrega a la lista de ignorados.
                if item not in files_to_keep:
                    # print(f"Ignorando archivo: {item} (en {current_dir})") # Debugging
                    ignore_list.append(item)

        # print(f"Directorio: {current_dir}, Ignorando: {ignore_list}") # Debugging
        return ignore_list

    # Devuelve la función 'ignore_files' configurada
    return ignore_files


## Divisón de datos

Se importa CSV con asignación de splits precalculada y se dividen las imagenes a las carpetas correspondientes.

In [11]:
df_split.sample(5)

Unnamed: 0_level_0,image_path,filename,class,group,tag,split
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
27754,Tomato___healthy/,4127b1a7-3442-4847-96cb-7ea68495039e___GH_HL L...,Tomato___healthy,Tomato,healthy,train
31801,Tomato___Tomato_Yellow_Leaf_Curl_Virus/,07621782-2709-4cfb-b4ff-1f1bad24df64___YLCV_GC...,Tomato___Tomato_Yellow_Leaf_Curl_Virus,Tomato,Tomato_Yellow_Leaf_Curl_Virus,test
16079,Orange___Haunglongbing_(Citrus_greening)/,8b92b2d2-a852-4a35-b351-942f23eefce9___CREC_HL...,Orange___Haunglongbing_(Citrus_greening),Orange,Haunglongbing_(Citrus_greening),train
29622,Cherry_(including_sour)___healthy/,db2c8b52-1f7d-4bb7-8cf1-81f241091122___JR_HL 9...,Cherry_(including_sour)___healthy,Cherry_(including_sour),healthy,train
33428,Tomato___Tomato_Yellow_Leaf_Curl_Virus/,b3fe3d8c-f0e0-405c-a67d-ac57db6be043___YLCV_GC...,Tomato___Tomato_Yellow_Leaf_Curl_Virus,Tomato,Tomato_Yellow_Leaf_Curl_Virus,train


### Construcción de carpetas

In [12]:
# Guarda directorio del dataset dividido
path = find_path("plantvillage-dataset")
SPLITTED_PATH = f"{path}splitted/" if path else None
SPLITTED_PATH

'/home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/'

In [13]:
splits = df_split['split'].value_counts().index.tolist()
splits

['train', 'test']

In [14]:
verfication = True # Ejecuta el proceso de verificación (punto 2)

print(f"Se inicia proceso de copiado del dataset…")
total_files = len(df_split) # Total de archivos del dataset
print(f" - Total de archivos en el dataset: {total_files}")

# Realiza el proceso de copiado de archivos para cada split
succeeded_process = True
for split in splits:
    # Crea las rutas de origen y destino
    # (Ejemplo: 'train', 'test', 'valid')
    print(f"\n\nIniciando proceso para '{split}' split …")
    source_folder = f'{ROOT_DIR}/'
    destination_folder = f'{SPLITTED_PATH}{split}/'
    total_split = len(df_split[df_split['split'] == split]) # Total de archivos del split
    if dataset_already_exists(destination_folder): # Verifica si el directorio existe y está vacío
        print("  ⨯ El directorio ya existe y contiene archivos, a petición del usuario se omite el proceso de copiado.")
        continue # Si el directorio ya existe, no se hace nada+
    else:
        print(f"🔄 Procesando split '{split.upper()}' ({(total_split/total_files*100):.2f}):")
        print(f"  - Total de archivos a copiar: {total_split}")
    succeeded = False

    try:
        print(f"1. Creando estructura de subcarpetas:")
        # 1. Crea la función para ignorar específica para el split a procesar
        ignore_function = create_ignore_function(df_split, train_label=split, filename_col='filename')
        print(f"    ✔ Función de filtro creada para el split ")

        # 2. Con copytree copia todo el "árbol" de directorios (careptas y subcarpetas)
        # Fitrando con ignore_function todos aquellos archivos que no pertenecen al split deseado
        print(f"    ∞ Copiando contenido del dataset (puede demorar hasta un minuto).")
        shutil.copytree(source_folder, destination_folder, ignore=ignore_function)
        print(f"    ✔ Proceso de copiado del split finalizado.")

        if verfication:
            # Verifica qué se haya copiado adecuadamente (opcional pero útil)
            print(f"2. Se inicia proceso de verificación…")
            copied_files = []
            for root, dirs, files_in_dest in os.walk(destination_folder):
                for name in files_in_dest:
                    copied_files.append(os.path.join(os.path.relpath(root, destination_folder), name).replace('\\', '/')) # Normalizar path
                    #print(f"  - {os.path.join(root, name)}") # Debuggin
            print(f"    ✔ Se crearon un total de {len(os.listdir(destination_folder))} carpetas (para las clases).")
            print(f"    ✔ Se copiaron un total de {len(copied_files)} archivos ({len(copied_files)/total_split*100:.2f}%)")
            # Agregar confirmación de igualdad cantidad split == copiados
            if len(copied_files) == total_split:
                print(f"✅ Se completó satisfactoriamente el subproceso de copiado para el split.\n")
                succeeded = True
            else:
                print(f" ❌ Error: No se pudo copiar correctamente el split '{split.upper()}'\n")
                succeeded = False
        else:
            succeeded = True # Si la verificación está desactivada, se asume que el proceso fue exitoso

    except FileExistsError:
        print(f"Error: La carpeta de destino '{destination_folder}' ya existe.\n")
        pass
    except Exception as e:
        print(f"Ocurrió un error inesperado: {e}\n")

    succeeded_process *= succeeded # Actualiza el estado del proceso
    # (Sólo es 'True' si todos los splits se copian correctamente)

if succeeded_process:
    print("\n\n🌟 El proceso de copiado del dataset ha finalizado con éxito.\n")
else:
    print("\n\n🚫 No se pudo completar satisfactoriamente el proceso de copiado del dataset.\nVerificar que se haya completado la eliminación de las carpetas.\n")

Se inicia proceso de copiado del dataset…
 - Total de archivos en el dataset: 54305


Iniciando proceso para 'train' split …
⛔️ La eliminación del directorio ha sido denegada por el usuario:
  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/train/
  ⨯ El directorio ya existe y contiene archivos, a petición del usuario se omite el proceso de copiado.


Iniciando proceso para 'test' split …
⛔️ La eliminación del directorio ha sido denegada por el usuario:
  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/test/
  ⨯ El directorio ya existe y contiene archivos, a petición del usuario se omite el proceso de copiado.


🌟 El proceso de copiado del dataset ha finalizado con éxito.



----
# Training model

## Dataloader

In [15]:
import tensorflow as tf

In [16]:
# Data laoders setup
def load_from_directory(data_folder):
    """
    Carga un dataset de imágenes desde un directorio específico.

    Args:
        data_folder (str): Ruta al directorio que contiene las imágenes.

    Returns:
        tf.data.Dataset: Dataset de TensorFlow con las imágenes y etiquetas.
    """
    dataset = tf.keras.preprocessing.image_dataset_from_directory(
        data_folder,  # Ruta al directorio de datos
        labels="inferred",  # Las etiquetas se infieren automáticamente desde los nombres de las carpetas
        label_mode="categorical",  # Las etiquetas se codifican como categorías (one-hot encoding)
        class_names=None,  # Las clases se detectan automáticamente
        color_mode="rgb",  # Las imágenes se cargan en modo RGB
        batch_size=32,  # Tamaño de lote para el entrenamiento
        image_size=(256, 256),  # Redimensiona las imágenes a 128x128 píxeles
        shuffle=True,  # Mezcla las imágenes aleatoriamente
        seed=42,  # No se utiliza una semilla específica para la aleatorización
        validation_split=None,  # No se realiza una división de validación aquí
        subset=None,  # No se especifica un subconjunto (train/validation)
        interpolation="bilinear",  # Método de interpolación para redimensionar las imágenes
        follow_links=False,  # No sigue enlaces simbólicos
        crop_to_aspect_ratio=False  # No recorta las imágenes para ajustar la relación de aspecto
    )

    return dataset

In [17]:
# # Carga el dataset de imágenes desde el directorio especificado
# train_images = ""; test_images = ""; valid_images = ""
# 
# print("Cargando datasets desde el directorio…\n")
# for split in splits:
#     data_folder = f'{SPLITTED_PATH}{split}/'
# 
#     # Carga el conjunto de datos desde el directorio especificado
#     # Utiliza la función de TensorFlow para crear un dataset de imágenes
#     match split:
#         case 'train':
#             print(f"Cargando dataset de entrenamiento desde:\n > {data_folder}")
#             train_images = load_from_directory(data_folder)
#         case 'test':
#             print(f"Cargando dataset de test desde:\n > {data_folder}")
#             test_images = load_from_directory(data_folder)
#         case 'valid':
#             print(f"Cargando dataset de validación desde:\n > {data_folder}")
#             valid_images = load_from_directory(data_folder)
#         case _: # En caso de no coincidir con ninguno de los splits
#             print(f"⚠️ El split '{split}' no es reconocido. No se cargará ningún dataset.")
#             continue # Salta al siguiente split
#     print(f"✅ Dataset cargado exitosamente.\n")

In [18]:
# print("Resumen de los datasets cargados:")
# print(f" - Total de imágenes en el dataset de entrenamiento: {len(train_images)}")
# print(f" - Total de imágenes en el dataset de validación: {len(valid_images)}")
# print(f" - Total de imágenes en el dataset de test: {len(test_images)}")
# print(f"Total de imágenes cargadas: {len(train_images) + len(test_images) + len(valid_images)}\n")

In [19]:
# print(f"Clases detectadas:")
# [print(" -",clase) for clase in train_images.class_names]
# print(f"Total de clases: {len(train_images.class_names)}")

----
# Arquitectura del modelo

In [20]:
# from tensorflow.keras import datasets, layers, models
# from tensorflow.keras import Input # For WARNING
# 
# model = models.Sequential()
# 
# # Bloque 1
# model.add(Input(shape=(256, 256, 3))) # WARNING
# model.add(layers.Rescaling(1./255)) # FALTABA MINMAX SCALLING
# model.add(layers.Conv2D(32, (3, 3), activation='relu'))
# model.add(layers.BatchNormalization())
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Dropout(0.1))
# 
# # Bloque 2
# model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# model.add(layers.BatchNormalization())
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Dropout(0.1))
# 
# # Bloque 3
# model.add(layers.Conv2D(128, (3, 3), activation='relu'))
# model.add(layers.BatchNormalization())
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Dropout(0.1))
# 
# # Bloque 4
# model.add(layers.Conv2D(256, (3, 3), activation='relu'))
# model.add(layers.BatchNormalization())
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Dropout(0.1))
# 
# model.add(layers.Flatten())
# 
# # Capa densa intermedia
# model.add(layers.Dense(512, activation='relu'))
# model.add(layers.BatchNormalization())
# model.add(layers.Dropout(0.5))
# model.add(layers.Dropout(0.25))
# 
# # Capa de salida con 38 neuronas y softmax para multiclase
# model.add(layers.Dense(38, activation='softmax'))
# 
# model.summary()

## Entrenamiento

In [21]:
import time

In [22]:
# from tensorflow.keras.callbacks import ModelCheckpoint

# start_time = time.perf_counter()

# # Definimos el callback para guardar el mejor modelo según la métrica elegida
# checkpoint_callback = ModelCheckpoint(
#     filepath='best_model.keras',   # Se generará una carpeta con este nombre
#     monitor='val_loss',            # Métrica a monitorear ('val_accuracy' es otra opción)
#     save_best_only=True,           # Guarda solo si hay mejora
#     save_weights_only=False,       # Guarda la arquitectura + pesos
#     verbose=1
# )

# # Ajusta el modelo a tu criterio
# with tf.device('/GPU:0'):
#     model.compile(
#         optimizer='adam',
#         loss='categorical_crossentropy', #loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), <-- ERROR
#         metrics=['accuracy']
#     )

# history = model.fit(
#     train_images,
#     validation_data=test_images,
#     epochs=10,
#     callbacks=[checkpoint_callback]  # Incorporamos el callback
# )

# end_time = time.perf_counter()
# elapsed_time = end_time - start_time
# print(f"Tiempo de entrenamiento: {elapsed_time:.2f} segundos")

In [23]:
# print(f"El entrenamiento tomó {elapsed_time:.2f} segundos")

## Guardando resultados

In [24]:
# #Recording History in json & pickle
# import json
# with open('training_hist.json','w') as f:
#   json.dump(history.history,f)
# 
# import pickle
# with open('training_hist.pkl', 'wb') as f:
#     pickle.dump(history.history, f)

In [25]:
# import os
# 
# experiment = 'experimento_1' # Completar número de experimento
# files = ['best_model.keras','training_hist.json','training_hist.pkl']
# destino=f"/content/drive/MyDrive/CV2-PlantVillage/{experiment}/"
# 
# def check_folder(folder):
#     if not os.path.exists(folder):
#         os.makedirs(folder)
#         print(f"Folder '{folder}' created successfully.")
#     else:
#         print(f"Folder '{folder}' already exists.")
# 
# check_folder(destino)
# 
# for file in files:
#     try:
#         origen=f"/content/{file}"
#         !cp -r "$origen" "$destino"
#     except:
#         print(f"Error al copiar el archivo '{file}'")
#     finally:
#         print(f"Archivo '{file}' copiado exitosamente.")

---
# Gráficos

In [26]:
# import matplotlib.pyplot as plt
# 
# epochs = [i for i in range(1,11)]
# plt.plot(epochs,history.history['accuracy'],color='red',label='Training Accuracy')
# plt.plot(epochs,history.history['val_accuracy'],color='blue',label='Validation Accuracy')
# plt.xlabel('No. of Epochs')
# plt.title('Visualization of Accuracy Result')
# plt.legend()
# plt.show()

In [27]:
# #Validation set Accuracy
# model = tf.keras.models.load_model('best_model.keras')
# val_loss, val_acc = model.evaluate(test_images)
# print('Validation accuracy:', val_acc)

In [28]:
!pip install ray[tune] tensorflow




In [39]:
from ray.air import session
import tensorflow as tf
from tensorflow.keras import layers, models
def train_fn(config):

    def load_from_directory(data_folder):
        return tf.keras.preprocessing.image_dataset_from_directory(
            data_folder,
            labels="inferred",
            label_mode="categorical",
            batch_size=32,
            image_size=(256, 256),
            shuffle=True,
            seed=42
        )

  # Carga el dataset de imágenes desde el directorio especificado
    train_images = ""; test_images = ""; valid_images = ""

    print("Cargando datasets desde el directorio…\n")
    for split in splits:
        data_folder = f'{SPLITTED_PATH}{split}/'

        # Carga el conjunto de datos desde el directorio especificado
        # Utiliza la función de TensorFlow para crear un dataset de imágenes
        match split:
            case 'train':
                print(f"Cargando dataset de entrenamiento desde:\n > {data_folder}")
                train_images = load_from_directory(data_folder)
            case 'test':
                print(f"Cargando dataset de test desde:\n > {data_folder}")
                test_images = load_from_directory(data_folder)
            case 'valid':
                print(f"Cargando dataset de validación desde:\n > {data_folder}")
                valid_images = load_from_directory(data_folder)
            case _: # En caso de no coincidir con ninguno de los splits
                print(f"⚠️ El split '{split}' no es reconocido. No se cargará ningún dataset.")
                continue # Salta al siguiente split
        print(f"✅ Dataset cargado exitosamente.\n")

    model = models.Sequential([
        layers.Input(shape=(256, 256, 3)),
        layers.Rescaling(1./255),
        layers.Conv2D(32, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(config["dropout_rate"]),
        layers.Flatten(),
        layers.Dense(38, activation='softmax')
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=config["learning_rate"]),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    model.fit(train_images, validation_data=test_images, epochs=3, verbose=0)

    val_loss, val_acc = model.evaluate(test_images, verbose=0)
    session.report({"loss": val_loss, "accuracy": val_acc})



In [40]:
from ray import tune
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Input

def create_model(filters1=32, filters2=64, filters3=128, filters4=256,
                 dropout_rate=0.1, dense_units=512, learning_rate=1e-3):
    model = models.Sequential()

    # Bloque 1
    model.add(Input(shape=(256, 256, 3)))
    model.add(layers.Rescaling(1./255))
    model.add(layers.Conv2D(filters1, (3, 3), activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(dropout_rate))

    # Bloque 2
    model.add(layers.Conv2D(filters2, (3, 3), activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(dropout_rate))

    # Bloque 3
    model.add(layers.Conv2D(filters3, (3, 3), activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(dropout_rate))

    # Bloque 4
    model.add(layers.Conv2D(filters4, (3, 3), activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(dropout_rate))

    model.add(layers.Flatten())

    # Capa densa intermedia
    model.add(layers.Dense(dense_units, activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dropout(0.25))

    # Capa de salida con 38 neuronas y softmax para multiclase
    model.add(layers.Dense(38, activation='softmax'))

    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model


In [45]:
search_space = {
    "filters1": tune.choice([32]),
    "filters2": tune.choice([64]),
    "filters3": tune.choice([128]),
    "filters4": tune.choice([256]),
    "dropout_rate": tune.uniform(0.05, 0.15),
    "dense_units": tune.choice([512]),
    "learning_rate": tune.loguniform(1e-6, 5e-4)
}


In [46]:
import ray

from ray.tune import run



analysis = run(
    train_fn,
    config=search_space,
    num_samples=10,  # Number of trials (i.e., different sets of hyperparameters)
    resources_per_trial = {"gpu": 1,"cpu": 16},  # Allocating 2 CPUs and 1 GPU per trial
    max_concurrent_trials=1,
    verbose=1,
    metric="loss",   # 👈 the name of the metric you reported
    mode="min"       # 👈 "min" for minimizing loss, "max" for accuracy etc.
)

print("Best hyperparameters found were: ", analysis.best_config)


0,1
Current time:,2025-04-22 01:23:40
Running for:,00:53:27.61
Memory:,9.7/14.8 GiB

Trial name,status,loc,dense_units,dropout_rate,filters1,filters2,filters3,filters4,learning_rate,iter,total time (s),loss,accuracy
train_fn_19d2d_00000,TERMINATED,192.168.100.111:1379426,512,0.142099,32,64,128,256,2.69557e-05,1,329.585,0.64086,0.820643
train_fn_19d2d_00001,TERMINATED,192.168.100.111:1380611,512,0.0698269,32,64,128,256,0.000218329,1,315.354,0.677385,0.819538
train_fn_19d2d_00002,TERMINATED,192.168.100.111:1381899,512,0.0807287,32,64,128,256,1.71275e-06,1,314.787,1.44194,0.639168
train_fn_19d2d_00003,TERMINATED,192.168.100.111:1383064,512,0.0992878,32,64,128,256,0.000139859,1,316.015,0.550488,0.838321
train_fn_19d2d_00004,TERMINATED,192.168.100.111:1384238,512,0.0643009,32,64,128,256,1.41564e-06,1,319.05,1.50273,0.629224
train_fn_19d2d_00005,TERMINATED,192.168.100.111:1385414,512,0.0709845,32,64,128,256,3.68248e-05,1,315.873,0.584358,0.831507
train_fn_19d2d_00006,TERMINATED,192.168.100.111:1386590,512,0.0783668,32,64,128,256,0.000190948,1,315.611,0.625622,0.827824
train_fn_19d2d_00007,TERMINATED,192.168.100.111:1387749,512,0.0659855,32,64,128,256,3.04429e-06,1,316.74,1.27155,0.66992
train_fn_19d2d_00008,TERMINATED,192.168.100.111:1388908,512,0.050925,32,64,128,256,3.91988e-06,1,319.328,1.14191,0.703895
train_fn_19d2d_00009,TERMINATED,192.168.100.111:1390088,512,0.135315,32,64,128,256,3.53835e-06,1,316.258,1.23111,0.679495


[36m(pid=1379426)[0m 2025-04-22 00:30:14.052309: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=1379426)[0m 2025-04-22 00:30:14.067142: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(pid=1379426)[0m E0000 00:00:1745292614.077391 1379426 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(pid=1379426)[0m E0000 00:00:1745292614.080527 1379426 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[36m(pid=1379426)[0m W0000 00:00:1745292614.08967

[36m(train_fn pid=1379426)[0m Cargando datasets desde el directorio…
[36m(train_fn pid=1379426)[0m 
[36m(train_fn pid=1379426)[0m Cargando dataset de entrenamiento desde:
[36m(train_fn pid=1379426)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/train/
[36m(train_fn pid=1379426)[0m Found 109718 files belonging to 38 classes.


[36m(train_fn pid=1379426)[0m I0000 00:00:1745292617.096159 1379518 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6103 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


[36m(train_fn pid=1379426)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1379426)[0m 
[36m(train_fn pid=1379426)[0m Cargando dataset de test desde:
[36m(train_fn pid=1379426)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/test/
[36m(train_fn pid=1379426)[0m Found 10861 files belonging to 38 classes.
[36m(train_fn pid=1379426)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1379426)[0m 


[36m(train_fn pid=1379426)[0m I0000 00:00:1745292618.573728 1379625 service.cc:152] XLA service 0x789a94018310 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
[36m(train_fn pid=1379426)[0m I0000 00:00:1745292618.573852 1379625 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Laptop GPU, Compute Capability 8.9
[36m(train_fn pid=1379426)[0m 2025-04-22 00:30:18.589962: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
[36m(train_fn pid=1379426)[0m I0000 00:00:1745292618.643378 1379625 cuda_dnn.cc:529] Loaded cuDNN version 90300
[36m(train_fn pid=1379426)[0m 
[36m(train_fn pid=1379426)[0m 
[36m(train_fn pid=1379426)[0m I0000 00:00:1745292621.099882 1379625 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
[36m(train_fn pid=1379426)[0m 
[36m(t

[36m(train_fn pid=1380611)[0m 
[36m(train_fn pid=1380611)[0m Cargando datasets desde el directorio…
[36m(train_fn pid=1380611)[0m Cargando dataset de entrenamiento desde:
[36m(train_fn pid=1380611)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/train/
[36m(train_fn pid=1380611)[0m Found 109718 files belonging to 38 classes.


[36m(train_fn pid=1380611)[0m I0000 00:00:1745292950.273066 1380697 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6133 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


[36m(train_fn pid=1380611)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1380611)[0m 
[36m(train_fn pid=1380611)[0m Cargando dataset de test desde:
[36m(train_fn pid=1380611)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/test/
[36m(train_fn pid=1380611)[0m Found 10861 files belonging to 38 classes.
[36m(train_fn pid=1380611)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1380611)[0m 


[36m(train_fn pid=1380611)[0m I0000 00:00:1745292951.975525 1380806 service.cc:152] XLA service 0x7d5410018620 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
[36m(train_fn pid=1380611)[0m I0000 00:00:1745292951.975721 1380806 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Laptop GPU, Compute Capability 8.9
[36m(train_fn pid=1380611)[0m 2025-04-22 00:35:51.993133: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
[36m(train_fn pid=1380611)[0m I0000 00:00:1745292952.049727 1380806 cuda_dnn.cc:529] Loaded cuDNN version 90300
[36m(train_fn pid=1380611)[0m 
[36m(train_fn pid=1380611)[0m 
[36m(train_fn pid=1380611)[0m I0000 00:00:1745292954.667128 1380806 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
[36m(train_fn pid=1380611)[0m 
[36m(t

[36m(train_fn pid=1381899)[0m Cargando datasets desde el directorio…
[36m(train_fn pid=1381899)[0m 
[36m(train_fn pid=1381899)[0m Cargando dataset de entrenamiento desde:
[36m(train_fn pid=1381899)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/train/
[36m(train_fn pid=1381899)[0m Found 109718 files belonging to 38 classes.


[36m(train_fn pid=1381899)[0m I0000 00:00:1745293268.691658 1381985 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6133 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


[36m(train_fn pid=1381899)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1381899)[0m 
[36m(train_fn pid=1381899)[0m Cargando dataset de test desde:
[36m(train_fn pid=1381899)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/test/
[36m(train_fn pid=1381899)[0m Found 10861 files belonging to 38 classes.
[36m(train_fn pid=1381899)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1381899)[0m 


[36m(train_fn pid=1381899)[0m I0000 00:00:1745293270.296493 1382090 service.cc:152] XLA service 0x713af00040f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
[36m(train_fn pid=1381899)[0m I0000 00:00:1745293270.296599 1382090 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Laptop GPU, Compute Capability 8.9
[36m(train_fn pid=1381899)[0m 2025-04-22 00:41:10.313592: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
[36m(train_fn pid=1381899)[0m I0000 00:00:1745293270.372832 1382090 cuda_dnn.cc:529] Loaded cuDNN version 90300
[36m(train_fn pid=1381899)[0m 
[36m(train_fn pid=1381899)[0m 
[36m(train_fn pid=1381899)[0m I0000 00:00:1745293273.160578 1382090 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
[36m(train_fn pid=1381899)[0m 
[36m(t

[36m(train_fn pid=1383064)[0m 
[36m(train_fn pid=1383064)[0m Cargando datasets desde el directorio…
[36m(train_fn pid=1383064)[0m Cargando dataset de entrenamiento desde:
[36m(train_fn pid=1383064)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/train/
[36m(train_fn pid=1383064)[0m Found 109718 files belonging to 38 classes.


[36m(train_fn pid=1383064)[0m I0000 00:00:1745293586.071678 1383149 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6133 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


[36m(train_fn pid=1383064)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1383064)[0m 
[36m(train_fn pid=1383064)[0m Cargando dataset de test desde:
[36m(train_fn pid=1383064)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/test/
[36m(train_fn pid=1383064)[0m Found 10861 files belonging to 38 classes.
[36m(train_fn pid=1383064)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1383064)[0m 


[36m(train_fn pid=1383064)[0m I0000 00:00:1745293587.685947 1383254 service.cc:152] XLA service 0x7043f4002960 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
[36m(train_fn pid=1383064)[0m I0000 00:00:1745293587.686243 1383254 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Laptop GPU, Compute Capability 8.9
[36m(train_fn pid=1383064)[0m 2025-04-22 00:46:27.703688: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
[36m(train_fn pid=1383064)[0m I0000 00:00:1745293587.762352 1383254 cuda_dnn.cc:529] Loaded cuDNN version 90300
[36m(train_fn pid=1383064)[0m 
[36m(train_fn pid=1383064)[0m 
[36m(train_fn pid=1383064)[0m I0000 00:00:1745293590.427777 1383254 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
[36m(train_fn pid=1383064)[0m 
[36m(t

[36m(train_fn pid=1384238)[0m Cargando datasets desde el directorio…
[36m(train_fn pid=1384238)[0m 
[36m(train_fn pid=1384238)[0m Cargando dataset de entrenamiento desde:
[36m(train_fn pid=1384238)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/train/
[36m(train_fn pid=1384238)[0m Found 109718 files belonging to 38 classes.


[36m(train_fn pid=1384238)[0m I0000 00:00:1745293904.827694 1384325 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6133 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


[36m(train_fn pid=1384238)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1384238)[0m 
[36m(train_fn pid=1384238)[0m Cargando dataset de test desde:
[36m(train_fn pid=1384238)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/test/
[36m(train_fn pid=1384238)[0m Found 10861 files belonging to 38 classes.
[36m(train_fn pid=1384238)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1384238)[0m 


[36m(train_fn pid=1384238)[0m I0000 00:00:1745293906.555790 1384430 service.cc:152] XLA service 0x7c9d28005c70 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
[36m(train_fn pid=1384238)[0m I0000 00:00:1745293906.555882 1384430 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Laptop GPU, Compute Capability 8.9
[36m(train_fn pid=1384238)[0m 2025-04-22 00:51:46.572748: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
[36m(train_fn pid=1384238)[0m I0000 00:00:1745293906.630878 1384430 cuda_dnn.cc:529] Loaded cuDNN version 90300
[36m(train_fn pid=1384238)[0m 
[36m(train_fn pid=1384238)[0m 
[36m(train_fn pid=1384238)[0m I0000 00:00:1745293909.351891 1384430 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
[36m(train_fn pid=1384238)[0m 
[36m(t

[36m(train_fn pid=1385414)[0m 
[36m(train_fn pid=1385414)[0m Cargando datasets desde el directorio…
[36m(train_fn pid=1385414)[0m Cargando dataset de entrenamiento desde:
[36m(train_fn pid=1385414)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/train/
[36m(train_fn pid=1385414)[0m Found 109718 files belonging to 38 classes.


[36m(train_fn pid=1385414)[0m I0000 00:00:1745294226.992019 1385500 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6133 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


[36m(train_fn pid=1385414)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1385414)[0m 
[36m(train_fn pid=1385414)[0m Cargando dataset de test desde:
[36m(train_fn pid=1385414)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/test/
[36m(train_fn pid=1385414)[0m Found 10861 files belonging to 38 classes.
[36m(train_fn pid=1385414)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1385414)[0m 


[36m(train_fn pid=1385414)[0m I0000 00:00:1745294228.601355 1385604 service.cc:152] XLA service 0x701d94004710 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
[36m(train_fn pid=1385414)[0m I0000 00:00:1745294228.601516 1385604 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Laptop GPU, Compute Capability 8.9
[36m(train_fn pid=1385414)[0m 2025-04-22 00:57:08.619262: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
[36m(train_fn pid=1385414)[0m I0000 00:00:1745294228.678226 1385604 cuda_dnn.cc:529] Loaded cuDNN version 90300
[36m(train_fn pid=1385414)[0m 
[36m(train_fn pid=1385414)[0m 
[36m(train_fn pid=1385414)[0m I0000 00:00:1745294231.431009 1385604 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
[36m(train_fn pid=1385414)[0m 
[36m(t

[36m(train_fn pid=1386590)[0m Cargando datasets desde el directorio…
[36m(train_fn pid=1386590)[0m 
[36m(train_fn pid=1386590)[0m Cargando dataset de entrenamiento desde:
[36m(train_fn pid=1386590)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/train/
[36m(train_fn pid=1386590)[0m Found 109718 files belonging to 38 classes.


[36m(train_fn pid=1386590)[0m I0000 00:00:1745294546.333564 1386677 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6133 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


[36m(train_fn pid=1386590)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1386590)[0m 
[36m(train_fn pid=1386590)[0m Cargando dataset de test desde:
[36m(train_fn pid=1386590)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/test/
[36m(train_fn pid=1386590)[0m Found 10861 files belonging to 38 classes.
[36m(train_fn pid=1386590)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1386590)[0m 


[36m(train_fn pid=1386590)[0m I0000 00:00:1745294547.962623 1386784 service.cc:152] XLA service 0x7c9470016a20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
[36m(train_fn pid=1386590)[0m I0000 00:00:1745294547.962726 1386784 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Laptop GPU, Compute Capability 8.9
[36m(train_fn pid=1386590)[0m 2025-04-22 01:02:27.979128: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
[36m(train_fn pid=1386590)[0m I0000 00:00:1745294548.036492 1386784 cuda_dnn.cc:529] Loaded cuDNN version 90300
[36m(train_fn pid=1386590)[0m 
[36m(train_fn pid=1386590)[0m 
[36m(train_fn pid=1386590)[0m I0000 00:00:1745294550.782174 1386784 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
[36m(train_fn pid=1386590)[0m 
[36m(t

[36m(train_fn pid=1387749)[0m 
[36m(train_fn pid=1387749)[0m Cargando datasets desde el directorio…
[36m(train_fn pid=1387749)[0m Cargando dataset de entrenamiento desde:
[36m(train_fn pid=1387749)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/train/
[36m(train_fn pid=1387749)[0m Found 109718 files belonging to 38 classes.


[36m(train_fn pid=1387749)[0m I0000 00:00:1745294864.372191 1387835 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6133 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


[36m(train_fn pid=1387749)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1387749)[0m 
[36m(train_fn pid=1387749)[0m Cargando dataset de test desde:
[36m(train_fn pid=1387749)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/test/
[36m(train_fn pid=1387749)[0m Found 10861 files belonging to 38 classes.
[36m(train_fn pid=1387749)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1387749)[0m 


[36m(train_fn pid=1387749)[0m I0000 00:00:1745294865.968007 1387942 service.cc:152] XLA service 0x7f759c019bc0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
[36m(train_fn pid=1387749)[0m I0000 00:00:1745294865.968078 1387942 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Laptop GPU, Compute Capability 8.9
[36m(train_fn pid=1387749)[0m 2025-04-22 01:07:45.983507: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
[36m(train_fn pid=1387749)[0m I0000 00:00:1745294866.040491 1387942 cuda_dnn.cc:529] Loaded cuDNN version 90300
[36m(train_fn pid=1387749)[0m 
[36m(train_fn pid=1387749)[0m 
[36m(train_fn pid=1387749)[0m I0000 00:00:1745294868.968256 1387942 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
[36m(train_fn pid=1387749)[0m 
[36m(t

[36m(train_fn pid=1388908)[0m Cargando datasets desde el directorio…
[36m(train_fn pid=1388908)[0m 
[36m(train_fn pid=1388908)[0m Cargando dataset de entrenamiento desde:
[36m(train_fn pid=1388908)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/train/
[36m(train_fn pid=1388908)[0m Found 109718 files belonging to 38 classes.


[36m(train_fn pid=1388908)[0m I0000 00:00:1745295184.471174 1388994 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6133 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


[36m(train_fn pid=1388908)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1388908)[0m 
[36m(train_fn pid=1388908)[0m Cargando dataset de test desde:
[36m(train_fn pid=1388908)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/test/
[36m(train_fn pid=1388908)[0m Found 10861 files belonging to 38 classes.
[36m(train_fn pid=1388908)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1388908)[0m 


[36m(train_fn pid=1388908)[0m I0000 00:00:1745295186.030584 1389104 service.cc:152] XLA service 0x7a5cb8002190 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
[36m(train_fn pid=1388908)[0m I0000 00:00:1745295186.030665 1389104 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Laptop GPU, Compute Capability 8.9
[36m(train_fn pid=1388908)[0m 2025-04-22 01:13:06.046348: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
[36m(train_fn pid=1388908)[0m I0000 00:00:1745295186.102104 1389104 cuda_dnn.cc:529] Loaded cuDNN version 90300
[36m(train_fn pid=1388908)[0m 
[36m(train_fn pid=1388908)[0m 
[36m(train_fn pid=1388908)[0m I0000 00:00:1745295188.730526 1389104 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
[36m(train_fn pid=1388908)[0m 
[36m(t

[36m(train_fn pid=1390088)[0m 
[36m(train_fn pid=1390088)[0m Cargando datasets desde el directorio…
[36m(train_fn pid=1390088)[0m Cargando dataset de entrenamiento desde:
[36m(train_fn pid=1390088)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/train/
[36m(train_fn pid=1390088)[0m Found 109718 files belonging to 38 classes.


[36m(train_fn pid=1390088)[0m I0000 00:00:1745295506.318976 1390176 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6133 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


[36m(train_fn pid=1390088)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1390088)[0m 
[36m(train_fn pid=1390088)[0m Cargando dataset de test desde:
[36m(train_fn pid=1390088)[0m  > /home/juan/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/splitted/test/
[36m(train_fn pid=1390088)[0m Found 10861 files belonging to 38 classes.
[36m(train_fn pid=1390088)[0m ✅ Dataset cargado exitosamente.
[36m(train_fn pid=1390088)[0m 


[36m(train_fn pid=1390088)[0m I0000 00:00:1745295507.892078 1390283 service.cc:152] XLA service 0x7c95780151f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
[36m(train_fn pid=1390088)[0m I0000 00:00:1745295507.892163 1390283 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Laptop GPU, Compute Capability 8.9
[36m(train_fn pid=1390088)[0m 2025-04-22 01:18:27.910618: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
[36m(train_fn pid=1390088)[0m I0000 00:00:1745295507.971646 1390283 cuda_dnn.cc:529] Loaded cuDNN version 90300
[36m(train_fn pid=1390088)[0m 
[36m(train_fn pid=1390088)[0m 
[36m(train_fn pid=1390088)[0m I0000 00:00:1745295510.785572 1390283 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
[36m(train_fn pid=1390088)[0m 
[36m(t

Best hyperparameters found were:  {'filters1': 32, 'filters2': 64, 'filters3': 128, 'filters4': 256, 'dropout_rate': 0.09928777465197672, 'dense_units': 512, 'learning_rate': 0.00013985928027592968}


In [None]:
learning_rate_best_config = 0.00013985928027592968
dropout_rate_best_config = 0.09928777465197672