In [11]:
import tensorflow as tf
import shutil
import zipfile
import os
from sklearn.model_selection import train_test_split


## Descomprimir archivo zip

In [12]:
# Rutas
archivo_zip = 'Imagenes_proyecto.zip'
carpeta_destino = 'data'

# Descomprimir el archivo ZIP
with zipfile.ZipFile(archivo_zip, 'r') as zip_ref:
    zip_ref.extractall(carpeta_destino)

print(f'Archivos extraídos en la carpeta: {carpeta_destino}')

Archivos extraídos en la carpeta: data


## Obtener rutas de archivos de imágenes y organizar estos archivos en conjuntos de entrenamiento y validación, clasificándolos por categoría (etiqueta).

In [13]:
def get_image_paths(directory_path):
    image_paths = []
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_paths.append(os.path.join(root, file))
    return image_paths

def organize_data(image_paths, label, train_dir, val_dir, test_dir, test_size=0.2, random_state=42):
    # Divide los datos en entrenamiento (80%) y temp (20%)
    train_paths, temp_paths = train_test_split(image_paths, test_size=test_size, random_state=random_state)
    
    # Divide temp en validación (50%) y prueba (50%)
    val_paths, test_paths = train_test_split(temp_paths, test_size=0.5, random_state=random_state)

    def copy_images(paths, dest_dir):
        os.makedirs(dest_dir, exist_ok=True)
        for path in paths:
            shutil.copy(path, os.path.join(dest_dir, os.path.basename(path)))

    train_label_dir = os.path.join(train_dir, label)
    val_label_dir = os.path.join(val_dir, label)
    test_label_dir = os.path.join(test_dir, label)

    copy_images(train_paths, train_label_dir)
    copy_images(val_paths, val_label_dir)
    copy_images(test_paths, test_label_dir)

    return len(train_paths), len(val_paths), len(test_paths)


In [14]:

#carpeta donbde se van a almacenar los datos de train y val
working_dir = 'dataset_final'

# Verifica si la carpeta ya existe
if not os.path.exists(working_dir):
    # Crea la carpeta si no existe
    os.makedirs(working_dir)
    print(f'Carpeta "{working_dir}" creada con éxito.')
else:
    print(f'La carpeta "{working_dir}" ya existe.')

Carpeta "dataset_final" creada con éxito.


In [15]:
# Directorio donde se encuentran las imágenes 
glioma_tumor_dir = 'data\\Data\\glioma_tumor'
meningioma_tumor_dir ='data\\Data\\meningioma_tumor'
normal_dir = 'data\\Data\\normal'
pituitary_tumor_dir = 'data\\Data\\pituitary_tumor'

# Guardando las carpetas de train, val y test
train_dir = os.path.join(working_dir, 'train')
val_dir = os.path.join(working_dir, 'val')
test_dir = os.path.join(working_dir, 'test') 


In [16]:
# Función get_image_paths
glioma_image_paths = get_image_paths(glioma_tumor_dir)
meningioma_image_paths = get_image_paths(meningioma_tumor_dir)
normal_image_paths = get_image_paths(normal_dir)
pituitary_image_paths = get_image_paths(pituitary_tumor_dir)

# Función organize_data
num_train_glioma, num_val_glioma, num_test_glioma = organize_data(glioma_image_paths, 'glioma_tumor', train_dir, val_dir, test_dir)
num_train_meningioma, num_val_meningioma, num_test_meningioma = organize_data(meningioma_image_paths, 'meningioma_tumor', train_dir, val_dir, test_dir)
num_train_normal, num_val_normal, num_test_normal = organize_data(normal_image_paths, 'normal', train_dir, val_dir, test_dir)
num_train_pituitary, num_val_pituitary, num_test_pituitary = organize_data(pituitary_image_paths, 'pituitary_tumor', train_dir, val_dir, test_dir)


In [18]:
# Imprimir resultados
print("Número de imágenes de entrenamiento:")
print(f"Glioma: {num_train_glioma}")
print(f"Meningioma: {num_train_meningioma}")
print(f"Normal: {num_train_normal}")
print(f"Pituitary: {num_train_pituitary}")

print("\nNúmero de imágenes de validación:")
print(f"Glioma: {num_val_glioma}")
print(f"Meningioma: {num_val_meningioma}")
print(f"Normal: {num_val_normal}")
print(f"Pituitary: {num_val_pituitary}")

print("\nNúmero de imágenes de prueba:")
print(f"Glioma: {num_test_glioma}")
print(f"Meningioma: {num_test_meningioma}")
print(f"Normal: {num_test_normal}")
print(f"Pituitary: {num_test_pituitary}")

Número de imágenes de entrenamiento:
Glioma: 720
Meningioma: 730
Normal: 350
Pituitary: 675

Número de imágenes de validación:
Glioma: 90
Meningioma: 91
Normal: 44
Pituitary: 84

Número de imágenes de prueba:
Glioma: 91
Meningioma: 92
Normal: 44
Pituitary: 85
