In [2]:
# !pip install autogluon
# !pip install albumentations
# !pip install pandas scikit-learn matplotlib seaborn

In [None]:
!pip uninstall autogluon autogluon.vision autogluon.core autogluon.mxnet -y


In [None]:
!pip install --upgrade pip setuptools


In [None]:
!pip install autogluon[vision]


In [None]:
!pip install autogluon


In [3]:
!pip install autogluon.vision --ignore-requires-python


Collecting autogluon.vision
  Downloading autogluon.vision-0.6.2-py3-none-any.whl.metadata (12 kB)
Collecting numpy<1.24,>=1.21 (from autogluon.vision)
  Downloading numpy-1.23.5-cp311-cp311-win_amd64.whl.metadata (2.3 kB)
Collecting pandas!=1.4.0,<1.6,>=1.2.5 (from autogluon.vision)
  Using cached pandas-1.5.3-cp311-cp311-win_amd64.whl.metadata (12 kB)
Collecting gluoncv<0.10.6,>=0.10.5 (from autogluon.vision)
  Using cached gluoncv-0.10.5.post0-py2.py3-none-any.whl.metadata (1.6 kB)
Collecting Pillow<=9.4.0,>=9.3.0 (from autogluon.vision)
  Downloading Pillow-9.4.0-cp311-cp311-win_amd64.whl.metadata (9.4 kB)
Collecting timm<0.7.0,>=0.5.4 (from autogluon.vision)
  Downloading timm-0.6.13-py3-none-any.whl.metadata (38 kB)
Collecting autogluon.core==0.6.2 (from autogluon.vision)
  Downloading autogluon.core-0.6.2-py3-none-any.whl.metadata (13 kB)
Collecting autogluon.multimodal==0.6.2 (from autogluon.vision)
  Downloading autogluon.multimodal-0.6.2-py3-none-any.whl.metadata (14 kB)
Coll

  error: subprocess-exited-with-error
  
  × Preparing metadata (pyproject.toml) did not run successfully.
  │ exit code: 1
  ╰─> [118 lines of output]
      
        `numpy.distutils` is deprecated since NumPy 1.23.0, as a result
        of the deprecation of `distutils` itself. It will be removed for
        Python >= 3.12. For older Python versions it will remain present.
        It is recommended to use `setuptools < 60.0` for those Python versions.
        For more details, see:
          https://numpy.org/devdocs/reference/distutils_status_migration.html
      
      
        from numpy.distutils.core import setup
      Running from SciPy source directory.
        obj = getattr(themodule, elem)
      Cythonizing sources
      Running scipy\linalg\_generate_pyx.py
      Running scipy\special\_generate_pyx.py
      Running scipy\stats\_generate_pyx.py
      Processing scipy\cluster\_hierarchy.pyx
      Processing scipy\cluster\_optimal_leaf_ordering.pyx
      Processing scipy\clust

In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from autogluon.vision import ImagePredictor, ImageDataset
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
import shutil
import random

ModuleNotFoundError: No module named 'autogluon.vision'

In [None]:
# -------------------------------
# 1. Preparación de los Datos
# -------------------------------

# Directorio principal de imágenes
images_dir = 'arcgis-survey-images'

# Obtener las clases a partir de los nombres de los subdirectorios
class_names = sorted([d for d in os.listdir(images_dir) if os.path.isdir(os.path.join(images_dir, d))])
print(f"Clases encontradas: {class_names}")

# Recopilar rutas de imágenes y etiquetas
data = []
for class_label in class_names:
    class_dir = os.path.join(images_dir, class_label)
    for img_name in os.listdir(class_dir):
        if img_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            img_path = os.path.join(class_dir, img_name)
            data.append({'image': img_path, 'label': class_label})

# Crear un DataFrame
df = pd.DataFrame(data)
print(f"Total de imágenes: {len(df)}")

# Dividir en conjuntos de entrenamiento y validación
train_df, valid_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=123)
print(f"Entrenamiento: {len(train_df)} imágenes")
print(f"Validación: {len(valid_df)} imágenes")

# Guardar los DataFrames en archivos CSV (opcional)
train_df.to_csv('train_data.csv', index=False)
valid_df.to_csv('valid_data.csv', index=False)

In [None]:
# -------------------------------
# 2. Preprocesamiento Avanzado (Opcional)
# -------------------------------

# Definir las transformaciones de preprocesamiento
def preprocess_image(image_path):
    image = Image.open(image_path).convert('RGB')
    image = np.array(image)
    
    transform = A.Compose([
        A.Resize(128, 128),
        A.RandomRotate90(p=0.5),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.3),
        A.GaussianBlur(blur_limit=(3,7), p=0.2),
        A.Canny(p=0.1),
        A.Normalize(mean=(0.485, 0.456, 0.406),
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ])
    
    transformed = transform(image=image)
    return transformed['image']

# Aplicar las transformaciones y guardar las imágenes preprocesadas en un directorio temporal
preprocessed_dir = 'preprocessed_images'
if os.path.exists(preprocessed_dir):
    shutil.rmtree(preprocessed_dir)
os.makedirs(preprocessed_dir)

def transformed_image_to_pil(tensor_image):
    # Convertir el tensor de vuelta a PIL Image para guardarlo
    image = tensor_image.permute(1, 2, 0).cpu().numpy()
    image = (image * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406])  # Desnormalizar
    image = np.clip(image, 0, 1)
    image = (image * 255).astype(np.uint8)
    return Image.fromarray(image)

def apply_preprocessing(df, split):
    processed_data = []
    for idx, row in df.iterrows():
        img_path = row['image']
        label = row['label']
        processed_image = preprocess_image(img_path)
        
        # Guardar la imagen preprocesada
        new_img_name = f"{split}_{idx}.png"
        new_img_path = os.path.join(preprocessed_dir, new_img_name)
        transformed_image = transformed_image_to_pil(processed_image)
        transformed_image.save(new_img_path)
        
        processed_data.append({'image': new_img_path, 'label': label})
        
    return pd.DataFrame(processed_data)

# Nota: Aplicar preprocesamiento puede ser intensivo en tiempo y recursos.
# Puedes comentar esta sección si prefieres dejar que AutoGluon maneje el preprocesamiento.

# train_processed_df = apply_preprocessing(train_df, 'train')
# valid_processed_df = apply_preprocessing(valid_df, 'valid')

# Para este ejemplo, usaremos las rutas originales sin preprocesamiento adicional.
train_processed_df = train_df.copy()
valid_processed_df = valid_df.copy()

In [None]:
# -------------------------------
# 3. Entrenamiento con AutoGluon
# -------------------------------

# Cargar los datos en el formato requerido por AutoGluon
train_data = ImageDataset.from_df(train_processed_df, label='label')
valid_data = ImageDataset.from_df(valid_processed_df, label='label')

# Crear el predictor de AutoGluon
predictor = ImagePredictor()

# Entrenar el modelo
predictor.fit(
    train_data=train_data,
    tuning_data=valid_data,  # Datos de validación para ajustar hiperparámetros
    time_limit=3600,  # Tiempo máximo en segundos (por ejemplo, 1 hora)
    presets='best_quality',  # Ajusta la calidad del entrenamiento
    hyperparameters={
        'model': 'resnet50',  # Puedes elegir diferentes arquitecturas
    },
    verbosity=2  # Nivel de detalle de los logs
)

In [None]:
# -------------------------------
# 4. Evaluación del Modelo
# -------------------------------

# Evaluar el modelo en el conjunto de validación
test_performance = predictor.evaluate(valid_data)
print("Resultados de evaluación en el conjunto de validación:")
print(test_performance)

# Obtener predicciones en el conjunto de validación
y_true = valid_processed_df['label'].values
y_pred = predictor.predict(valid_data)

# Generar el reporte de clasificación
print("Reporte de clasificación:")
print(classification_report(y_true, y_pred, target_names=class_names))

# Crear la matriz de confusión
conf_matrix = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', xticklabels=class_names, yticklabels=class_names, cmap='Blues')
plt.xlabel('Predicción')
plt.ylabel('Etiqueta')
plt.title('Matriz de Confusión')
plt.show()

In [None]:
# -------------------------------
# 5. Visualización de Predicciones
# -------------------------------

# Mostrar algunas imágenes con sus predicciones
def show_predictions(predictor, df, class_names, num_images=5):
    samples = df.sample(n=num_images, random_state=42)
    for idx, row in samples.iterrows():
        img_path = row['image']
        true_label = row['label']
        # Cargar la imagen en formato compatible
        image = Image.open(img_path).convert('RGB')
        pred_label = predictor.predict(ImageDataset.from_df(pd.DataFrame([row]), label='label')).iloc[0]
        
        plt.figure(figsize=(4,4))
        plt.imshow(image)
        plt.title(f"Verdadero: {true_label}\nPredicción: {pred_label}")
        plt.axis('off')
        plt.show()

# Mostrar 5 predicciones aleatorias
show_predictions(predictor, valid_processed_df, class_names, num_images=5)

In [None]:
# -------------------------------
# 6. Guardar y Cargar el Modelo
# -------------------------------

# Guardar el modelo
predictor.save("autogluon_plague_classifier")

# Cargar el modelo guardado
loaded_predictor = ImagePredictor.load("autogluon_plague_classifier")

# Evaluar nuevamente para verificar
loaded_predictor.evaluate(valid_data)