# **Aumentación**

**Geométricas suaves**

* HorizontalFlip, VerticalFlip

* Rotate o RandomRotate90

* Pequeños RandomResizedCrop o ShiftScaleRotate con parámetros moderados.

**Fotométricas**

* RandomBrightnessContrast

* HueSaturationValue (ligero)

**Efectos suaves**

* GaussianBlur suave

* GaussNoise muy ligero

**Celda 1 – Instalación (si hace falta) e imports**

In [1]:
rm -rf /kaggle/working/*

In [None]:
!pip install -q albumentations==1.4.0

In [None]:
import os
from glob import glob
import cv2
import albumentations as A
import pandas as pd
from datetime import datetime
import shutil

**Celda 2 – Definir rutas y verificar dataset original**

In [None]:
# Ruta del dataset original en Kaggle (ya incluye las 10 carpetas)
DATA_ROOT = "/kaggle/input/data-cucumber-images-jpg/data_cucumber_images_jpg"

# Aquí están directamente las carpetas con imágenes originales
ORIG_DIR = DATA_ROOT  # NO añadimos "images_original"

# Carpeta donde se guardarán las imágenes aumentadas (en /kaggle/working)
AUG_ROOT = "/kaggle/working/cucumber_images_augmented"
os.makedirs(AUG_ROOT, exist_ok=True)

print("Directorio de originales:", ORIG_DIR)
print("Directorio para aumentadas:", AUG_ROOT)

# Comprobar cuántas imágenes originales hay por carpeta
for class_name in sorted(os.listdir(ORIG_DIR)):
    class_path = os.path.join(ORIG_DIR, class_name)
    if not os.path.isdir(class_path):
        continue

    image_paths = []
    for ext in ("*.jpg", "*.jpeg", "*.png"):
        image_paths.extend(glob(os.path.join(class_path, ext)))

    print(f"Carpeta '{class_name}': {len(image_paths)} imágenes")

**Celda 3 – Definir la pipeline de aumentación**

In [None]:
# Definimos la pipeline de aumentación:
# - Geométricas suaves
# - Fotométricas
# - Efectos suaves
transform = A.Compose([
    # Geométricas suaves
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(
        shift_limit=0.05,
        scale_limit=0.10,
        rotate_limit=15,
        border_mode=cv2.BORDER_REFLECT_101,
        p=0.5
    ),
    # Fotométricas
    A.RandomBrightnessContrast(
        brightness_limit=0.2,
        contrast_limit=0.2,
        p=0.7
    ),
    A.HueSaturationValue(
        hue_shift_limit=10,
        sat_shift_limit=20,
        val_shift_limit=10,
        p=0.5
    ),
    # Efectos suaves
    A.GaussianBlur(
        blur_limit=(3, 5),
        p=0.3
    ),
    A.GaussNoise(
        var_limit=(5.0, 20.0),
        p=0.3
    ),
])

**Celda 4 – Generar imágenes aumentadas y registrar en un CSV**

In [None]:
# Número de imágenes aumentadas por cada imagen original
N_AUG_PER_IMAGE = 3

# Lista para construir el CSV de trazabilidad
log_rows = []

for class_name in sorted(os.listdir(ORIG_DIR)):
    class_orig_path = os.path.join(ORIG_DIR, class_name)
    if not os.path.isdir(class_orig_path):
        continue

    # Carpeta de salida para esta clase
    class_aug_path = os.path.join(AUG_ROOT, class_name)
    os.makedirs(class_aug_path, exist_ok=True)

    # Obtener todas las imágenes de la clase
    image_paths = []
    for ext in ("*.jpg", "*.jpeg", "*.png"):
        image_paths.extend(glob(os.path.join(class_orig_path, ext)))

    print(f"Procesando clase '{class_name}' con {len(image_paths)} imágenes...")

    for img_path in image_paths:
        img = cv2.imread(img_path)
        if img is None:
            print(f"  [AVISO] No se pudo leer: {img_path}")
            continue

        # Convertir a RGB para albumentations
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        base_name = os.path.splitext(os.path.basename(img_path))[0]

        for i in range(N_AUG_PER_IMAGE):
            # Aplicar aumentación
            augmented = transform(image=img)["image"]
            augmented_bgr = cv2.cvtColor(augmented, cv2.COLOR_RGB2BGR)

            # Nombre y ruta del archivo aumentado
            new_name = f"{base_name}_aug{i+1}.jpg"
            new_path = os.path.join(class_aug_path, new_name)

            cv2.imwrite(new_path, augmented_bgr)

            # Registrar en el log
            log_rows.append({
                "original_filename": os.path.basename(img_path),
                "augmented_filename": new_name,
                "class": class_name,
                "original_rel_path": os.path.relpath(img_path, DATA_ROOT),
                "augmented_rel_path": os.path.relpath(new_path, "/kaggle/working"),
                "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            })

print("Aumentación finalizada. Imágenes generadas:", len(log_rows))

**Celda 5 – Crear el CSV augmentation_log.csv**

In [None]:
# Crear DataFrame y guardar CSV en /kaggle/working
df_log = pd.DataFrame(log_rows)
csv_path = "/kaggle/working/augmentation_log.csv"
df_log.to_csv(csv_path, index=False)

print("CSV de trazabilidad guardado en:", csv_path)
df_log.head()

**Celda 6 – Crear un ZIP con todas las imágenes aumentadas**

In [None]:
zip_base_path = "/kaggle/working/cucumber_images_augmented_zip"

# Esto generará /kaggle/working/cucumber_images_augmented_zip.zip
shutil.make_archive(zip_base_path, 'zip', AUG_ROOT)

print("ZIP creado en:", zip_base_path + ".zip")

In [2]:
import os
from glob import glob
import cv2
import albumentations as A
import pandas as pd
from datetime import datetime
import shutil

DATA_ROOT = "/kaggle/input/data-cucumber-images-jpg/data_cucumber_images_jpg"
ORIG_DIR = DATA_ROOT

# Cambia aquí las carpetas que faltan:
carpetas_faltantes = [
    "vegetative_growth_smartphone"
]

chunk_name = "faltantes_" + "_".join(carpetas_faltantes)
chunk_dir = f"/kaggle/working/{chunk_name}"
zip_path = chunk_dir + ".zip"

os.makedirs(chunk_dir, exist_ok=True)

# pipeline de aumentación
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(
        shift_limit=0.05, scale_limit=0.10, rotate_limit=15,
        border_mode=cv2.BORDER_REFLECT_101, p=0.5
    ),
    A.RandomBrightnessContrast(0.2, 0.2, p=0.7),
    A.HueSaturationValue(10, 20, 10, p=0.5),
    A.GaussianBlur((3, 5), p=0.3),
    A.GaussNoise(var_limit=(0.001, 0.01), p=0.3)  # ← corregido
])

N_AUG_PER_IMAGE = 3
csv_path = "/kaggle/working/augmentation_log.csv"
write_header = not os.path.exists(csv_path)

log_rows = []

for class_name in carpetas_faltantes:
    orig_class_path = os.path.join(ORIG_DIR, class_name)
    out_class_path = os.path.join(chunk_dir, class_name)
    os.makedirs(out_class_path, exist_ok=True)

    image_paths = glob(orig_class_path + "/*.jpg") + \
                  glob(orig_class_path + "/*.jpeg") + \
                  glob(orig_class_path + "/*.png")

    print(f"Procesando {class_name}: {len(image_paths)} imágenes")

    for img_path in image_paths:
        img = cv2.imread(img_path)
        if img is None:
            continue
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        base = os.path.basename(img_path).split(".")[0]

        for i in range(N_AUG_PER_IMAGE):
            aug = transform(image=img)["image"]
            aug_bgr = cv2.cvtColor(aug, cv2.COLOR_RGB2BGR)
            new_name = f"{base}_aug{i+1}.jpg"
            new_path = os.path.join(out_class_path, new_name)
            cv2.imwrite(new_path, aug_bgr)

            log_rows.append({
                "original_filename": os.path.basename(img_path),
                "augmented_filename": new_name,
                "class": class_name,
                "chunk": chunk_name,
                "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            })

# Guardar log
df = pd.DataFrame(log_rows)
df.to_csv(csv_path, mode="a", header=write_header, index=False)

# Comprimir chunk
shutil.make_archive(chunk_dir, "zip", chunk_dir)

print("ZIP creado:", zip_path)

  original_init(self, **validated_kwargs)
  A.GaussNoise(var_limit=(0.001, 0.01), p=0.3)  # ← corregido


Procesando vegetative_growth_smartphone: 283 imágenes
ZIP creado: /kaggle/working/faltantes_vegetative_growth_smartphone.zip


In [3]:
import os
from glob import glob
import cv2
import albumentations as A
import shutil

DATA_ROOT = "/kaggle/input/data-cucumber-images-jpg/data_cucumber_images_jpg"
ORIG_DIR = DATA_ROOT
class_name = "vegetative_growth_smartphone"

orig_path = os.path.join(ORIG_DIR, class_name)
out_dir = "/kaggle/working/vegetative_growth_smartphone_augmented"

os.makedirs(out_dir, exist_ok=True)

# Aumentación
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(
        shift_limit=0.05, scale_limit=0.10, rotate_limit=15,
        border_mode=cv2.BORDER_REFLECT_101, p=0.5
    ),
    A.RandomBrightnessContrast(0.2, 0.2, p=0.7),
    A.HueSaturationValue(10, 20, 10, p=0.5),
    A.GaussianBlur((3, 5), p=0.3),
    A.GaussNoise(var_limit=(0.001, 0.01), p=0.3),
])

N_AUG_PER_IMAGE = 3

# Cargar originales
imgs = sorted(
    glob(orig_path + "/*.jpg") +
    glob(orig_path + "/*.jpeg") +
    glob(orig_path + "/*.png")
)

print("Imágenes originales encontradas:", len(imgs))

# Generar aumentadas
count = 0
for img_path in imgs:
    img = cv2.imread(img_path)
    if img is None:
        continue
    
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    base = os.path.basename(img_path).split(".")[0]
    
    for i in range(N_AUG_PER_IMAGE):
        aug = transform(image=img_rgb)["image"]
        aug_bgr = cv2.cvtColor(aug, cv2.COLOR_RGB2BGR)
        
        new_name = f"{base}_aug{i+1}.jpg"
        cv2.imwrite(os.path.join(out_dir, new_name), aug_bgr)
        count += 1

print("Total aumentadas generadas:", count)

  original_init(self, **validated_kwargs)
  A.GaussNoise(var_limit=(0.001, 0.01), p=0.3),


Imágenes originales encontradas: 283
Total aumentadas generadas: 849


In [4]:
import os
from glob import glob
import shutil

class_folder = "/kaggle/working/vegetative_growth_smartphone_augmented"

all_imgs = sorted(glob(class_folder + "/*"))

print("Total aumentadas:", len(all_imgs))

IMAGES_PER_ZIP = 100

for i in range(0, len(all_imgs), IMAGES_PER_ZIP):
    batch = all_imgs[i:i + IMAGES_PER_ZIP]
    part_idx = i // IMAGES_PER_ZIP + 1
    
    tmp_dir = f"/kaggle/working/vg_smartphone_part_{part_idx:03d}"
    os.makedirs(tmp_dir, exist_ok=True)
    
    for img_path in batch:
        shutil.copy(img_path, tmp_dir)
    
    zip_base = tmp_dir
    shutil.make_archive(zip_base, 'zip', tmp_dir)
    print(f"ZIP creado: {zip_base}.zip")
    
    shutil.rmtree(tmp_dir)

print("Proceso completado.")

Total aumentadas: 849
ZIP creado: /kaggle/working/vg_smartphone_part_001.zip
ZIP creado: /kaggle/working/vg_smartphone_part_002.zip
ZIP creado: /kaggle/working/vg_smartphone_part_003.zip
ZIP creado: /kaggle/working/vg_smartphone_part_004.zip
ZIP creado: /kaggle/working/vg_smartphone_part_005.zip
ZIP creado: /kaggle/working/vg_smartphone_part_006.zip
ZIP creado: /kaggle/working/vg_smartphone_part_007.zip
ZIP creado: /kaggle/working/vg_smartphone_part_008.zip
ZIP creado: /kaggle/working/vg_smartphone_part_009.zip
Proceso completado.
