In [2]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import pandas as pd

def process_images(image_dir, target_size=(150, 150)):
    # Generador de imágenes
    datagen = ImageDataGenerator(rescale=1./255)
    
    # Cargar las imágenes
    image_data = datagen.flow_from_directory(
        image_dir,
        target_size=target_size,
        batch_size=32,
        class_mode='categorical'
    )
    
    # Dividir en características (X) y etiquetas (y)
    X = []
    y = []
    for _ in range(image_data.samples // image_data.batch_size):
        images, labels = next(image_data)
        for image, label in zip(images, labels):
            if image.shape == (150, 150, 3):  # Verificar que la imagen tenga las dimensiones correctas
                X.append(image)
                y.append(label)
    
    # Convertir a numpy arrays
    X = np.array(X)
    y = np.array(y)
    
    # Dividir en entrenamiento y prueba
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Guardar los datos procesados
    np.save('X_train.npy', X_train)
    np.save('X_test.npy', X_test)
    np.save('y_train.npy', y_train)
    np.save('y_test.npy', y_test)
    
    return X_train, X_test, y_train, y_test

# Procesar las imágenes y guardar los datos
image_dir = 'flowers'
X_train, X_test, y_train, y_test = process_images(image_dir)

Found 4317 images belonging to 5 classes.
