In [1]:
# !pip install --upgrade transformers tensorflow


In [2]:
# !pip uninstall keras


In [3]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from tqdm import tqdm

from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.io import read_image
from torch.utils.tensorboard import SummaryWriter

from transformers import ViTForImageClassification, ViTImageProcessor

In [4]:
# Configurar el dispositivo y mostrar información
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Utilizando dispositivo: {device}")

if torch.cuda.is_available():
    print(f"Nombre de la GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memoria total de la GPU: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

Utilizando dispositivo: cuda
Nombre de la GPU: NVIDIA GeForce GTX 1650 Ti
Memoria total de la GPU: 4.29 GB


In [5]:
# Definir parámetros
EPOCHS = 50
IMAGE_SIZE = (224, 224)
SEED = 123
BATCH_SIZE = 16  # Aumentado para GPU
LEARNING_RATE = 0.00001

In [12]:
# Configurar semilla para reproducibilidad
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

images_dir = '../arcgis-survey-images-new-last/arcgis-survey-images-new-last'

In [13]:
# Obtener nombres de clases y asignar etiquetas numéricas
class_names = sorted([d for d in os.listdir(images_dir) if os.path.isdir(os.path.join(images_dir, d))])
class_to_label = {class_name: idx for idx, class_name in enumerate(class_names)}

print(f"Clases encontradas: {class_names}")

Clases encontradas: ['Chinche salivosa', 'Clororis', 'Hoja sana', 'Roya naranja', 'Roya purpura']


In [8]:
# Crear listas para almacenar rutas de imágenes y etiquetas
image_paths = []
labels = []

for class_name in class_names:
    class_dir = os.path.join(images_dir, class_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(class_to_label[class_name])


In [9]:
# Crear el DataFrame
data_df = pd.DataFrame({
    'filepath': image_paths,
    'label': labels
})


In [None]:
print(f"Distribución original de clases:\n{data_df['label'].value_counts()}")

In [11]:
# Dividir en conjuntos de entrenamiento y validación
train_df, val_df = train_test_split(
    data_df,
    test_size=0.2,
    stratify=data_df['label'],
    random_state=SEED
)

In [None]:
# Upsampling de clases minoritarias en el conjunto de entrenamiento
df_list = [train_df[train_df['label'] == i] for i in train_df['label'].unique()]
max_count = train_df['label'].value_counts().max()
df_upsampled = [df if len(df) == max_count else resample(df, replace=True, n_samples=max_count, random_state=SEED) for df in df_list]
train_df_balanced = pd.concat(df_upsampled)
train_df_balanced = train_df_balanced.sample(frac=1, random_state=SEED).reset_index(drop=True)

print("Distribución de clases después del upsampling:")
print(train_df_balanced['label'].value_counts())

In [13]:
# Definir el Dataset personalizado
class ImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['filepath']
        label = self.dataframe.iloc[idx]['label']
        image = read_image(img_path)
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

In [14]:
# Definir las transformaciones
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


In [15]:
# Crear datasets
train_dataset = ImageDataset(train_df_balanced, transform=transform)
val_dataset = ImageDataset(val_df, transform=transform)

# Crear dataloaders con pin_memory para GPU
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)


In [None]:
# Cargar el modelo ViT
vit_model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224",
    num_labels=len(class_names),
    ignore_mismatched_sizes=True
)
vit_model = vit_model.to(device)

In [None]:
# Utilizar Automatic Mixed Precision para acelerar el entrenamiento en GPU
scaler = torch.cuda.amp.GradScaler()

# Definir la función de pérdida y el optimizador
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vit_model.parameters(), lr=LEARNING_RATE)

# Definir el planificador de tasa de aprendizaje
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=3, min_lr=1e-6, verbose=True)

# Configurar TensorBoard
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
writer = SummaryWriter(log_dir)


In [18]:
def train_epoch(model, loader, criterion, optimizer, device, scaler):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for batch_idx, (inputs, labels) in enumerate(tqdm(loader, desc="Training")):
        print(f"Processing batch {batch_idx+1}/{len(loader)}")
        inputs, labels = inputs.to(device), labels.to(device)

        # Automatic Mixed Precision
        with torch.cuda.amp.autocast():
            outputs = model(inputs).logits
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

        # Imprimir cada 10 batches
        if (batch_idx + 1) % 10 == 0:
            print(f"Batch {batch_idx+1}: Loss = {loss.item():.4f}")

    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [19]:
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    for batch_idx, (inputs, labels) in enumerate(tqdm(loader, desc="Validating")):
        print(f"Validating batch {batch_idx+1}/{len(loader)}")
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs).logits
        loss = criterion(outputs, labels)
        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

        # Imprimir cada 10 batches
        if (batch_idx + 1) % 10 == 0:
            print(f"Validation Batch {batch_idx+1}: Loss = {loss.item():.4f}")

    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [None]:
# Configurar el dispositivo a CPU
device = torch.device("cpu")
print(f"Utilizando dispositivo: {device}")


In [None]:
# Entrenamiento principal
best_val_loss = float('inf')
for epoch in range(EPOCHS):
    train_loss, train_acc = train_epoch(vit_model, train_loader, criterion, optimizer, device, scaler)
    val_loss, val_acc = validate(vit_model, val_loader, criterion, device)
    
    # Registrar métricas en TensorBoard
    writer.add_scalar('Loss/train', train_loss, epoch)
    writer.add_scalar('Loss/val', val_loss, epoch)
    writer.add_scalar('Accuracy/train', train_acc, epoch)
    writer.add_scalar('Accuracy/val', val_acc, epoch)
    
    # Imprimir progreso
    print(f'Epoch {epoch+1}/{EPOCHS}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')
    
    # Ajustar la tasa de aprendizaje
    scheduler.step(val_loss)
    
    # Guardar el mejor modelo
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(vit_model.state_dict(), 'best_model_vit.pth')
        print(f'Modelo guardado con pérdida de validación: {val_loss:.4f}')
    
    # Early stopping
    if optimizer.param_groups[0]['lr'] < 1e-6:
        print("La tasa de aprendizaje ha caído por debajo del umbral. Deteniendo el entrenamiento.")
        break

    # Mostrar uso de memoria GPU
    if torch.cuda.is_available():
        print(f"Uso de memoria GPU: {torch.cuda.memory_allocated() / 1e9:.2f} GB")

writer.close()
print("Entrenamiento completado.")

In [None]:
# Evaluar el modelo en el conjunto de validación
vit_model.eval()
y_pred = []
y_true = []
with torch.no_grad():
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = vit_model(inputs).logits
        _, predicted = outputs.max(1)
        y_pred.extend(predicted.cpu().numpy())
        y_true.extend(labels.cpu().numpy())

# Calcular la matriz de confusión
conf_matrix = confusion_matrix(y_true, y_pred)

# Visualizar la matriz de confusión
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, xticklabels=class_names, yticklabels=class_names, annot=True, fmt='g', cmap='Blues')
plt.xlabel('Predicción')
plt.ylabel('Etiqueta Verdadera')
plt.title('Matriz de Confusión')
plt.show()

# Reporte de clasificación
print("Reporte de clasificación:")
print(classification_report(y_true, y_pred, target_names=class_names))