In [1]:
from google.colab import files
uploaded = files.upload()


Saving archive (3).zip to archive (3).zip


In [3]:
import zipfile
import os

zip_path = "alzheimer.zip"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall("/content/categorias")


Para la aumentación de datos para CN pero sin el cambio del brillo

In [4]:
import os
import cv2
import numpy as np
from PIL import Image
import albumentations as A
from concurrent.futures import ThreadPoolExecutor

input_dir = "categorias/CN"
output_dir = "categorias/CN"  # O cambia a otra carpeta
target_total = 8800

existing_images = sorted([f for f in os.listdir(input_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
n_existing = len(existing_images)
n_to_generate = target_total - n_existing

# Transformaciones con ajuste de recorte
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=15, p=0.5),
    A.GaussNoise(var_limit=(10.0, 50.0), p=0.3)
])

# Función que realiza la aumentación y guarda la imagen
def augment_and_save_image(image_path, idx):
    # Cargar imagen
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Aplicar la transformacion
    augmented = transform(image=image)
    aug_img = augmented["image"]

    # Guardar la imagen aumentada
    base_name = os.path.basename(image_path)
    new_filename = f"aug_{idx}_{base_name}"
    output_path = os.path.join(output_dir, new_filename)
    Image.fromarray(aug_img).save(output_path)

# Función para realizar la aumentación en paralelo
def process_images_in_parallel():
    with ThreadPoolExecutor() as executor:
        futures = []
        for i in range(n_to_generate):
            base_name = np.random.choice(existing_images)
            img_path = os.path.join(input_dir, base_name)

            # Ejecutar el proceso de aumentación y guardado de forma paralela
            futures.append(executor.submit(augment_and_save_image, img_path, i))

        # Esperar a que todos los procesos terminen
        for future in futures:
            future.result()  # Obtiene el resultado (esto bloquea hasta que cada tarea termine)

    print("¡Aumentación completada en paralelo!")

# Ejecutar el proceso
process_images_in_parallel()

  check_for_updates()
  A.GaussNoise(var_limit=(10.0, 50.0), p=0.3)


¡Aumentación completada en paralelo!


Preprocesar los datos de las categorías

In [8]:
import os
import cv2
import numpy as np
from concurrent.futures import ProcessPoolExecutor, as_completed
from multiprocessing import freeze_support

# Ruta a la carpeta que contiene las imágenes
image_folder = 'categorias/LMCI'

# Obtener lista de rutas de imágenes
image_paths = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]

# Función para procesar una imagen
def process_image(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(gray, (128, 128))
    return resized

def main():
    processed_images = []

    with ProcessPoolExecutor() as executor:
        future_to_path = {executor.submit(process_image, path): path for path in image_paths}
        for future in as_completed(future_to_path):
            try:
                processed_image = future.result()
                processed_images.append(processed_image)
            except Exception as exc:
                print(f"Error procesando {future_to_path[future]}: {exc}")

    print(f"Procesadas {len(processed_images)} imágenes.")

if __name__ == '__main__':
    freeze_support()  # Necesario en Windows si se va a congelar el script
    main()

Procesadas 8960 imágenes.


Renombrar las imágenes

In [9]:
import os
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
import shutil

# Ruta principal donde están las carpetas de categorías
BASE_DIR = Path("categorias")

# Categorías a procesar
CATEGORIES = ["AD", "CN", "EMCI", "LMCI"]

# Formatos válidos de imagen
VALID_EXTS = [".jpg", ".jpeg", ".png"]

def rename_images(category_path, category_name):
    image_files = [f for f in sorted(category_path.iterdir()) if f.suffix.lower() in VALID_EXTS]

    for idx, image_path in enumerate(image_files, 1):
        new_name = f"{category_name}_{idx:05d}{image_path.suffix.lower()}"
        new_path = category_path / new_name
        os.rename(image_path, new_path)

    print(f"[{category_name}] Renombradas {len(image_files)} imágenes.")

def main():
    with ThreadPoolExecutor(max_workers=4) as executor:
        for category in CATEGORIES:
            category_path = BASE_DIR / category
            if category_path.exists() and category_path.is_dir():
                executor.submit(rename_images, category_path, category)

if __name__ == "__main__":
    main()

[AD] Renombradas 8960 imágenes.
[CN] Renombradas 8800 imágenes.
[LMCI] Renombradas 8960 imágenes.
[EMCI] Renombradas 9600 imágenes.


División de los datos

In [10]:
import os
import random
import shutil
from pathlib import Path
from sklearn.model_selection import train_test_split

# Ruta de entrada
INPUT_DIR = Path("categorias")
# Ruta de salida
OUTPUT_DIR = Path("dataset")
# Proporciones
TEST_RATIO = 0.2
VAL_RATIO = 0.2  # Sobre el 80% restante (20% de 80% = 16%)

# Categorías
CATEGORIES = ["AD", "CN", "EMCI", "LMCI"]

def split_and_copy_images():
    for category in CATEGORIES:
        print(f"Procesando categoría: {category}")

        category_path = INPUT_DIR / category
        images = list(category_path.glob("*.*"))
        images = [img for img in images if img.suffix.lower() in [".jpg", ".jpeg", ".png"]]

        # Dividir en test y restante
        trainval_imgs, test_imgs = train_test_split(images, test_size=TEST_RATIO, random_state=42)
        # Dividir restante en train y val
        train_imgs, val_imgs = train_test_split(trainval_imgs, test_size=VAL_RATIO, random_state=42)

        # Copiar imágenes a sus respectivas carpetas
        copy_images(train_imgs, category, "train")
        copy_images(val_imgs, category, "val")
        copy_images(test_imgs, category, "test")

    print("División completada.")

def copy_images(images, category, subset):
    subset_dir = OUTPUT_DIR / subset / category
    subset_dir.mkdir(parents=True, exist_ok=True)
    for img_path in images:
        dest = subset_dir / img_path.name
        shutil.copy2(img_path, dest)

if __name__ == "__main__":
    split_and_copy_images()

Procesando categoría: AD
Procesando categoría: CN
Procesando categoría: EMCI
Procesando categoría: LMCI
División completada.


In [15]:
import shutil

shutil.make_archive("datasetCorrecto", 'zip', "dataset")


'/content/datasetCorrecto.zip'

In [16]:
from google.colab import files

files.download("datasetCorrecto.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>