In [None]:
import torch
print(f"PyTorch: {torch.__version__}")
print(f"CUDA disponible: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'No GPU'}")


import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score, classification_report, confusion_matrix
import numpy as np
import os
import time
import copy
from pathlib import Path
import pandas as pd
from torch.utils.data import DataLoader
from datetime import datetime


# --- 1. Función Auxiliar: Definición de parámetros para Fine-Tuning ---
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False


# --- 2. Función Clave: Inicialización de Modelos con Tamaños Óptimos ---
# Nota: Todos los tamaños de entrada son los estándares recomendados por las especificaciones de ImageNet.
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    model_ft = None
    input_size = 0


    if model_name == "VGG11-BN":
        """ VGG11_bn (Input 224) """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs, num_classes)
        input_size = 224


    elif model_name == "DenseNet-201":
        """ DenseNet-201 (Input 224) """
        model_ft = models.densenet201(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224


    elif model_name == "ResNet-101":
        """ ResNet-101 (Input 224) """
        model_ft = models.resnet101(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224


    elif model_name == "MobileNetV2":
        """ MobileNetV2 (Input 224) """
        model_ft = models.mobilenet_v2(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[1].in_features
        model_ft.classifier[1] = nn.Linear(num_ftrs, num_classes)
        input_size = 224


    elif model_name == "inception":
        """ Inception V3 (Input 299) """
        # Se requiere aux_logits=True para cargar los pesos preentrenados
        model_ft = models.inception_v3(pretrained=use_pretrained, aux_logits=True)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Modificar las capas Auxiliar y Principal
        model_ft.AuxLogits.fc = nn.Linear(model_ft.AuxLogits.fc.in_features, num_classes)
        model_ft.fc = nn.Linear(model_ft.fc.in_features, num_classes)
        input_size = 299


    else:
        print("Nombre de modelo inválido.")
        exit()
    return model_ft, input_size


# La función train_model modificada (indentación corregida)
def train_model(model, dataloaders, criterion, optimizer, save_path, num_epochs=30, is_inception=False, model_name=""):
    Path(save_path).mkdir(parents=True, exist_ok=True)
    since = time.time()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    # Listas para almacenar métricas generales (Epoch-by-Epoch)
    general_results_data = []

    # Variables para guardar las mejores métricas por especie y la matriz de confusión
    best_species_data = []
    best_cm_data = None
    best_epoch_number = -1

    # Nombres de tus 12 especies (clases)
    class_names = dataloaders['val'].dataset.classes

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'val']:
            # INICIO DE LA MEDICIÓN DEL TIEMPO POR FASE
            start_phase_time = time.time()

            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            all_labels = []
            all_preds = []

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    if is_inception and phase == 'train':
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4 * loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)

                # Acumulación de datos para métricas globales y por clase
                all_labels.extend(labels.cpu().numpy())
                all_preds.extend(preds.cpu().numpy())

            # FIN DE LA MEDICIÓN DEL TIEMPO POR FASE
            time_elapsed_phase = time.time() - start_phase_time

            epoch_loss = running_loss / len(dataloaders[phase].dataset)

            # Métrica General
            epoch_acc = accuracy_score(all_labels, all_preds)
            epoch_recall_micro = recall_score(all_labels, all_preds, average='micro', zero_division=0)
            epoch_precision_micro = precision_score(all_labels, all_preds, average='micro', zero_division=0)
            epoch_f1_score_micro = f1_score(all_labels, all_preds, average='micro', zero_division=0)

            print('{} Loss: {:.4f} Acc: {:.4f} F1 (micro): {:.4f}'.format(
                  phase, epoch_loss, epoch_acc, epoch_f1_score_micro))

            # Almacenamiento de Métricas Generales (Para el primer Excel)
            general_results_data.append({
                'Model': model_name,
                'Epoch': epoch,
                'Phase': phase,
                'Loss': epoch_loss,
                'Acc': epoch_acc,
                'F1_micro': epoch_f1_score_micro,
                'Time_sec': time_elapsed_phase # GUARDANDO EL TIEMPO POR FASE
            })

            # Actualización del mejor modelo
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_epoch_number = epoch
                best_model_wts = copy.deepcopy(model.state_dict())

                # Guardar pesos del mejor modelo
                torch.save({'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss':loss},
                           f'{save_path}/best.pt')

                # CÁLCULO DE MÉTRICAS DETALLADAS POR ESPECIE (Para el segundo Excel)
                report = classification_report(all_labels, all_preds, target_names=class_names, output_dict=True, zero_division=0)

                best_species_data = []
                for class_name, metrics in report.items():
                    if class_name in class_names:
                        best_species_data.append({
                            'Model': model_name,
                            'Best_Acc': best_acc,
                            'Species': class_name,
                            'Precision': metrics['precision'],
                            'Recall': metrics['recall'],
                            'F1_Score': metrics['f1-score'],
                            'Support': metrics['support']
                        })

                best_cm_data = confusion_matrix(all_labels, all_preds)

        print('Tiempo total de Época: {:.0f}m {:.0f}s'.format(time_elapsed_phase // 60, time_elapsed_phase % 60)) # Muestra el tiempo de la última fase (val)


    time_elapsed_total = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed_total // 60, time_elapsed_total % 60))
    print('Best val acc: {:.4f} en época {}'.format(best_acc, best_epoch_number))
    model.load_state_dict(best_model_wts)

    # Convertir resultados de especies y matriz a DataFrames
    best_species_df = pd.DataFrame(best_species_data)

    # Matriz de Confusión a DataFrame (Filas: Real, Columnas: Predicción)
    if best_cm_data is not None:
        best_cm_df = pd.DataFrame(best_cm_data, index=class_names, columns=class_names)
        best_cm_df.insert(0, 'Model', model_name)
    else:
        best_cm_df = pd.DataFrame()

    return pd.DataFrame(general_results_data), best_species_df, best_cm_df


# --- 1. Bloque de Entrenamiento Comparativo ---


# Modelos a entrenar y acumulación de resultados
MODELS_TO_RUN = [
    {"name": "VGG11-BN", "optimizer": "SGD"},
    {"name": "DenseNet-201", "optimizer": "SGD"},
    {"name": "ResNet-101", "optimizer": "SGD"},
    {"name": "MobileNetV2", "optimizer": "SGD"},
    {"name": "inception", "optimizer": "SGD"},
]


ALL_GENERAL_RESULTS = []
ALL_SPECIES_RESULTS = []
ALL_CMS = []


num_classes = 12
batch_size = 8
num_epochs = 30
feature_extract = False
data_dir = './plantas_dataset'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()


for config in MODELS_TO_RUN:
    model_name = config['name']


    print(f"\n{'='*30}\n INICIANDO ENTRENAMIENTO: {model_name}\n{'='*30}")


    # Inicialización del modelo para obtener el input_size correcto
    model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
    model_ft = model_ft.to(device)


    # 2. Transformaciones de Datos (Con las aumentaciones solicitadas)
    data_transforms = {
        'train': transforms.Compose([
            transforms.Resize((input_size, input_size)),
            transforms.RandomRotation(degrees=15),
            transforms.GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 2.0)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.255])
        ]),
        'val': transforms.Compose([
            transforms.Resize((input_size, input_size)),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.255])
        ]),
    }


    # 3. DataLoaders
    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
    dataloader_dict = {x: DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']}


    # 4. Optimizador (Usando SGD 0.001 como base de tu estudio)
    params_to_update = model_ft.parameters()
    optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)


    # 5. Entrenamiento
    save_path = model_name.replace('-', '_').upper()
    is_inception = (model_name == "inception")


    general_df, species_df, cm_df = train_model(
        model_ft, dataloader_dict, criterion, optimizer_ft,
        save_path=save_path, num_epochs=num_epochs,
        is_inception=is_inception, model_name=model_name
    )


    # 6. Acumular resultados
    ALL_GENERAL_RESULTS.append(general_df)
    ALL_SPECIES_RESULTS.append(species_df)
    ALL_CMS.append(cm_df)


# --- 7. Exportar Resultados a CSV (o Excel) ---


# Resultados generales (Epoch-by-Epoch)
final_general_df = pd.concat(ALL_GENERAL_RESULTS, ignore_index=True)
final_general_df.to_csv('General_Results_History.csv', index=False)


# Resultados detallados por especie (Mejor época)
final_species_df = pd.concat(ALL_SPECIES_RESULTS, ignore_index=True)
final_species_df.to_csv('Species_Metrics_Best_Epoch.csv', index=False)


# Matriz de Confusión (Mejor época de cada modelo)
# Se guardará en un archivo con una estructura para fácil lectura
with pd.ExcelWriter('Confusion_Matrices.xlsx', engine='xlsxwriter') as writer:
    for cm_df in ALL_CMS:
        if not cm_df.empty:
            model_name_safe = cm_df['Model'].iloc[0].replace('-', '_')
            cm_df_to_save = cm_df.drop(columns=['Model'])
            # Escribir cada matriz en una hoja separada
            cm_df_to_save.to_excel(writer, sheet_name=model_name_safe)


print("\nExportación finalizada:\n")
print("- 'General_Results_History.csv' (Resultados generales, epoch-by-epoch)")
print("- 'Species_Metrics_Best_Epoch.csv' (Resultados por especie de la mejor época)")
print("- 'Confusion_Matrices.xlsx' (Matrices de confusión, una hoja por modelo)")
