In [None]:
# --- 1. CONFIGURACIÓN INICIAL ---


# Instalación de librerías (si es necesario)
!pip install --quiet openpyxl
!pip install --quiet xlsxwriter


# Montar Google Drive (Sigue las instrucciones que aparecen al ejecutar)
from google.colab import drive
import os
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, recall_score, f1_score, precision_score
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


drive.mount('/content/drive')


# --- 2. CONFIGURACIÓN DE RUTAS (AJUSTADAS) ---




# Ruta a la carpeta principal donde se encuentra 'MODELOS'
DRIVE_ROOT = '/content/drive/MyDrive/'


# Ruta a la carpeta que contiene las carpetas de los modelos (ej: DENSENET_201, INCEPTION, etc.)
MODELS_DIR = os.path.join(DRIVE_ROOT, 'MODELOS')


# Ruta a la carpeta TEST (donde están las 12 carpetas de especies)
# Asumo que esta es la ruta de la imagen original. Si no es así, AJÚSTALA.
TEST_DATA_PATH = os.path.join(DRIVE_ROOT, 'plantas_dataset_v2/dataset_split_anidado/test')


# Carpeta para guardar los resultados finales de la evaluación
SAVE_RESULTS_PATH = os.path.join(DRIVE_ROOT, 'Evaluation_Results')


# Crear la carpeta de resultados si no existe
if not os.path.exists(SAVE_RESULTS_PATH):
    os.makedirs(SAVE_RESULTS_PATH)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")


# Parámetros generales
num_classes = 12
batch_size = 8
feature_extract = False # Debe coincidir con el entrenamiento


# Lista de modelos a evaluar, con sus nombres de carpeta exactos para cargar los pesos
MODELS_TO_EVAL = [
    {"name": "VGG11-BN", "folder": "VGG11_BN"},
    {"name": "DenseNet-201", "folder": "DENSENET_201"},
    {"name": "ResNet-101", "folder": "RESNET_101"},
    {"name": "MobileNetV2", "folder": "MOBILENETV2"},
    {"name": "inception", "folder": "INCEPTION"},
]


print(f"\nModelos listos para evaluación en: {MODELS_DIR}")
print(f"Datos de prueba cargados desde: {TEST_DATA_PATH}")
# --- 3. FUNCIONES DE TU CÓDIGO ORIGINAL (Asegúrate de que sean idénticas) ---


# Función Auxiliar para Fine-Tuning
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False


# Función Clave: Inicialización de Modelos con Tamaños Óptimos
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    from torchvision import models
    model_ft = None
    input_size = 0


    if model_name == "VGG11-BN":
        """ VGG11_bn (Input 224) """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs, num_classes)
        input_size = 224


    elif model_name == "DenseNet-201":
        """ DenseNet-201 (Input 224) """
        model_ft = models.densenet201(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224


    elif model_name == "ResNet-101":
        """ ResNet-101 (Input 224) """
        model_ft = models.resnet101(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224


    elif model_name == "MobileNetV2":
        """ MobileNetV2 (Input 224) """
        model_ft = models.mobilenet_v2(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[1].in_features
        model_ft.classifier[1] = nn.Linear(num_ftrs, num_classes)
        input_size = 224


    elif model_name == "inception":
        """ Inception V3 (Input 299) """
        model_ft = models.inception_v3(pretrained=use_pretrained, aux_logits=True)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.AuxLogits.fc = nn.Linear(model_ft.AuxLogits.fc.in_features, num_classes)
        model_ft.fc = nn.Linear(model_ft.fc.in_features, num_classes)
        input_size = 299


    else:
        print("Nombre de modelo inválido.")
        return None, 0


    return model_ft, input_size
# --- 4. FUNCIÓN DE EVALUACIÓN Y GENERACIÓN DE GRÁFICOS ---


def evaluate_model_and_plot(model, dataloader, model_name, class_names):
    """Evalúa el modelo, calcula métricas detalladas y genera gráficos."""
    model.eval()
    all_labels = []
    all_preds = []


    print(f"Evaluando {model_name} en {len(dataloader.dataset)} imágenes...")


    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)


            outputs = model(inputs)
            if isinstance(outputs, tuple):  # Manejo específico para Inception en evaluación
                outputs = outputs[0]


            _, preds = torch.max(outputs, 1)


            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())


    # --- Métrica General ---
    acc = accuracy_score(all_labels, all_preds)
    f1_micro = f1_score(all_labels, all_preds, average='micro', zero_division=0)
    print(f"\n--- Resultados Generales de {model_name} en TEST ---")
    print(f"Accuracy General: {acc:.4f}")
    print(f"F1-Score (Micro): {f1_micro:.4f}")


    # --- Métricas por Clase (DataFrame) ---
    report = classification_report(all_labels, all_preds, target_names=class_names, output_dict=True, zero_division=0)
    species_data = []
    for class_name, metrics in report.items():
        if class_name in class_names:
            species_data.append({
                'Model': model_name,
                'Species': class_name,
                'Precision': metrics['precision'],
                'Recall': metrics['recall'],
                'F1_Score': metrics['f1-score'],
                'Support': metrics['support']
            })
    species_df = pd.DataFrame(species_data)


    # --- Matriz de Confusión (DataFrame) ---
    cm = confusion_matrix(all_labels, all_preds)
    cm_df = pd.DataFrame(cm, index=class_names, columns=class_names)
    cm_df.insert(0, 'Model', model_name)


    # --- Generación de Gráficos ---


    # 1. Matriz de Confusión
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title(f'Matriz de Confusión: {model_name}\nAcc: {acc:.4f}')
    plt.ylabel('Etiqueta Real (True Label)')
    plt.xlabel('Predicción (Predicted Label)')
    plt.savefig(os.path.join(SAVE_RESULTS_PATH, f'CM_{model_name.replace("-", "_")}.png'), bbox_inches='tight')
    plt.show()

    # 2. Precisión por Especie (Gráfico de Barras)
    plt.figure(figsize=(12, 6))
    sns.barplot(x='Species', y='Precision', data=species_df, palette='viridis')
    plt.title(f'Precisión por Especie (TEST): {model_name}')
    plt.ylim(0, 1.05)
    plt.xticks(rotation=45, ha='right')
    plt.ylabel('Precisión')
    plt.xlabel('Especie')
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.savefig(os.path.join(SAVE_RESULTS_PATH, f'Precision_by_Species_{model_name.replace("-", "_")}.png'), bbox_inches='tight')
    plt.show()


    return acc, f1_micro, species_df, cm_df


# --- 5. BUCLE PRINCIPAL DE EVALUACIÓN ---


ALL_EVAL_SPECIES_RESULTS = []
ALL_EVAL_CMS = []
ALL_EVAL_GENERAL_RESULTS = []


for config in MODELS_TO_EVAL:
    model_name = config['name']
    folder_name = config['folder']


    print(f"\n{'='*50}")
    print(f" INICIANDO EVALUACIÓN DE: {model_name}")
    print(f"{'='*50}")


    # 1. Definir Input Size y Modelos
    # use_pretrained=False aquí solo garantiza la estructura, ya que cargaremos los pesos entrenados
    model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=False)
    if model_ft is None:
        continue


    # 2. Cargar Pesos del Mejor Modelo
    weights_path = os.path.join(MODELS_DIR, folder_name, 'best.pt')


    if not os.path.exists(weights_path):
        print(f"ADVERTENCIA: No se encontraron pesos en {weights_path}. Saltando {model_name}.")
        continue


    try:
        checkpoint = torch.load(weights_path, map_location=device)
        model_ft.load_state_dict(checkpoint['model_state_dict'])
        model_ft.to(device)
    except Exception as e:
        print(f"ERROR al cargar el estado del modelo {model_name}: {e}. Saltando.")
        continue


    # 3. Transformaciones para el conjunto de prueba
    test_transforms = transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.255])
    ])


    # 4. DataLoader de Prueba
    try:
        test_dataset = datasets.ImageFolder(TEST_DATA_PATH, test_transforms)
        class_names = test_dataset.classes
        test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
        print(f"Clases detectadas: {class_names}")
    except Exception as e:
        print(f"ERROR al cargar el conjunto de datos de prueba desde {TEST_DATA_PATH}: {e}. Saltando.")
        continue


    # 5. Evaluación
    acc, f1_micro, species_df, cm_df = evaluate_model_and_plot(model_ft, test_dataloader, model_name, class_names)


    # 6. Acumular Resultados
    ALL_EVAL_GENERAL_RESULTS.append(pd.DataFrame([{
        'Model': model_name,
        'Test_Acc': acc,
        'Test_F1_Micro': f1_micro
    }]))
    ALL_EVAL_SPECIES_RESULTS.append(species_df)
    ALL_EVAL_CMS.append(cm_df)




# --- 6. EXPORTAR RESULTADOS FINALES ---


# Resultados generales (Accuracy y F1)
final_eval_general_df = pd.concat(ALL_EVAL_GENERAL_RESULTS, ignore_index=True)
final_eval_general_df.to_csv(os.path.join(SAVE_RESULTS_PATH, 'Final_Test_General_Metrics.csv'), index=False)


# Resultados detallados por especie
final_eval_species_df = pd.concat(ALL_EVAL_SPECIES_RESULTS, ignore_index=True)
final_eval_species_df.to_csv(os.path.join(SAVE_RESULTS_PATH, 'Final_Test_Species_Metrics.csv'), index=False)


# Matriz de Confusión (Excel)
with pd.ExcelWriter(os.path.join(SAVE_RESULTS_PATH, 'Final_Test_Confusion_Matrices.xlsx'), engine='xlsxwriter') as writer:
    for cm_df in ALL_EVAL_CMS:
        if not cm_df.empty:
            model_name_safe = cm_df['Model'].iloc[0].replace('-', '_')
            cm_df_to_save = cm_df.drop(columns=['Model'])
            cm_df_to_save.to_excel(writer, sheet_name=model_name_safe)




print("\n")
print("="*50)
print(" Evaluación de Prueba Finalizada con Éxito")
print(f" Resultados guardados en Drive en la carpeta: {SAVE_RESULTS_PATH}")
print("="*50)
