In [None]:
### Importar Librerías

import torch
from torchvision.datasets import Food101
from torchvision import transforms, models
from torch.utils.data import DataLoader, Subset, Dataset, ConcatDataset
import random
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import time

"""### Configuración inicial"""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")

# Fijar semillas para reproducibilidad
torch.manual_seed(42)
random.seed(42)
np.random.seed(42)

"""### Transformaciones"""

# Transformación base (sin data augmentation)
base_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
])

# Transformación con data augmentation para entrenamiento
augment_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomResizedCrop(224, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.3, scale=(0.02, 0.2), ratio=(0.3, 3.3))
])

"""### Cargar datasets base"""

# Dataset para visualización
viz_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

print("Cargando datasets...")
train_dataset = Food101(root='.', download=True, split='train', transform=viz_transform)
test_dataset = Food101(root='.', download=True, split='test', transform=viz_transform)

# Obtener índices de hot dogs
hot_dog_index = train_dataset.classes.index('hot_dog')
print(f"Índice de 'hot_dog': {hot_dog_index}")

# Obtener todos los índices
targets = train_dataset._labels
hotdog_indices = [i for i, label in enumerate(targets) if label == hot_dog_index]
not_hotdog_indices = [i for i, label in enumerate(targets) if label != hot_dog_index]

test_targets = test_dataset._labels
test_hotdog_indices = [i for i, label in enumerate(test_targets) if label == hot_dog_index]
test_not_hotdog_indices = [i for i, label in enumerate(test_targets) if label != hot_dog_index]

print(f"\nDatos disponibles:")
print(f"Train - Hot dogs: {len(hotdog_indices)}, No hot dogs: {len(not_hotdog_indices)}")
print(f"Test - Hot dogs: {len(test_hotdog_indices)}, No hot dogs: {len(test_not_hotdog_indices)}")

"""### Dataset binario personalizado"""

class BinaryFoodDataset(Dataset):
    def __init__(self, original_dataset, indices, labels, transform=None):
        self.original_dataset = original_dataset
        self.indices = indices
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        original_idx = self.indices[idx]

        # Obtener imagen PIL del dataset original
        image, _ = self.original_dataset[original_idx]

        # Convertir tensor a PIL si es necesario
        if torch.is_tensor(image):
            # Convertir de tensor a PIL
            image = transforms.ToPILImage()(image)

        if self.transform:
            image = self.transform(image)
        else:
            image = base_transform(image)

        label = torch.tensor([self.labels[idx]], dtype=torch.float32)
        return image, label

"""### Dataset con repetición para data augmentation"""

class AugmentedHotdogDataset(Dataset):
    def __init__(self, original_dataset, hotdog_indices, target_size, transform):
        self.original_dataset = original_dataset
        self.hotdog_indices = hotdog_indices
        self.target_size = target_size
        self.transform = transform

    def __len__(self):
        return self.target_size

    def __getitem__(self, idx):
        # Elegir un hot dog al azar
        original_idx = random.choice(self.hotdog_indices)
        image, _ = self.original_dataset[original_idx]

        # Convertir tensor a PIL si es necesario
        if torch.is_tensor(image):
            image = transforms.ToPILImage()(image)

        image = self.transform(image)
        label = torch.tensor([1.0], dtype=torch.float32)
        return image, label

"""### Preparar datasets de test (diferentes para cada experimento)"""

def create_balanced_test_loader():
    """Crea el test loader balanceado para el Experimento 1"""
    random.seed(42)
    test_balanced_not_hotdog = random.sample(test_not_hotdog_indices, len(test_hotdog_indices))
    test_all_indices = test_hotdog_indices + test_balanced_not_hotdog
    test_labels = [1.0] * len(test_hotdog_indices) + [0.0] * len(test_balanced_not_hotdog)

    test_binary_ds = BinaryFoodDataset(
        test_dataset,
        test_all_indices,
        test_labels,
        transform=base_transform
    )

    test_loader = DataLoader(test_binary_ds, batch_size=32, shuffle=False, num_workers=2)
    print(f"Test set balanceado: {len(test_binary_ds)} muestras ({len(test_hotdog_indices)} hot dogs, {len(test_balanced_not_hotdog)} no hot dogs)")
    return test_loader

def create_full_test_loader():
    """Crea el test loader completo para los Experimentos 2 y 3"""
    # Usar TODO el conjunto de test
    test_all_indices = test_hotdog_indices + test_not_hotdog_indices
    test_labels = [1.0] * len(test_hotdog_indices) + [0.0] * len(test_not_hotdog_indices)

    test_binary_ds = BinaryFoodDataset(
        test_dataset,
        test_all_indices,
        test_labels,
        transform=base_transform
    )

    test_loader = DataLoader(test_binary_ds, batch_size=32, shuffle=False, num_workers=2)
    print(f"Test set completo: {len(test_binary_ds)} muestras ({len(test_hotdog_indices)} hot dogs, {len(test_not_hotdog_indices)} no hot dogs)")
    return test_loader

"""### Modelo CNN personalizado"""

class CNNBinary(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64*28*28, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.classifier(self.features(x))

"""### Función de entrenamiento común"""

def train_model(model, train_loader, test_loader, model_name, n_epochs=5):
    print(f"\n{'='*50}")
    print(f"ENTRENANDO: {model_name}")
    print(f"{'='*50}")

    # Limpiar memoria GPU antes de cada entrenamiento
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    # Reinicializar completamente el modelo
    model.apply(lambda m: m.reset_parameters() if hasattr(m, 'reset_parameters') else None)

    model = model.to(device)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)

    best_test_acc = 0.0
    results = []

    start_time = time.time()

    for epoch in range(n_epochs):
        # ——————— ENTRENAMIENTO ———————
        model.train()
        total_loss = 0.0
        correct_train = 0
        total_train = 0

        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * labels.size(0)
            predicted = (outputs > 0.5).float()
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        avg_loss = total_loss / total_train
        train_acc = 100.0 * correct_train / total_train

        # ——————— EVALUACIÓN ———————
        model.eval()
        correct_test = 0
        total_test = 0

        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                predicted = (outputs > 0.5).float()
                correct_test += (predicted == labels).sum().item()
                total_test += labels.size(0)

        test_acc = 100.0 * correct_test / total_test

        if test_acc > best_test_acc:
            best_test_acc = test_acc

        results.append({
            'epoch': epoch + 1,
            'train_loss': avg_loss,
            'train_acc': train_acc,
            'test_acc': test_acc
        })

        print(f'Epoch {epoch+1}/{n_epochs}: Loss: {avg_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Acc: {test_acc:.2f}%')

    training_time = time.time() - start_time
    print(f"\nTiempo de entrenamiento: {training_time:.2f} segundos")
    print(f"Mejor Test Accuracy: {best_test_acc:.2f}%")

    return model, results, best_test_acc

"""### Función de evaluación detallada"""

def evaluate_detailed(model, test_loader, model_name):
    model.eval()
    all_predictions = []
    all_labels = []
    all_probabilities = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            predicted = (outputs > 0.5).float()

            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probabilities.extend(outputs.cpu().numpy())

    all_predictions = np.array(all_predictions).flatten()
    all_labels = np.array(all_labels).flatten()
    all_probabilities = np.array(all_probabilities).flatten()

    # Calcular métricas
    tp = np.sum((all_predictions == 1) & (all_labels == 1))
    tn = np.sum((all_predictions == 0) & (all_labels == 0))
    fp = np.sum((all_predictions == 1) & (all_labels == 0))
    fn = np.sum((all_predictions == 0) & (all_labels == 1))

    accuracy = (tp + tn) / (tp + tn + fp + fn)
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    print(f"\n{model_name} - Resultados detallados:")
    print(f"Accuracy: {accuracy*100:.2f}%")
    print(f"Precision: {precision:.3f}")
    print(f"Recall: {recall:.3f}")
    print(f"F1-Score: {f1:.3f}")
    print(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'tp': tp, 'tn': tn, 'fp': fp, 'fn': fn
    }

"""### EXPERIMENTO 1: Balanceo básico (750 muestras cada clase) - Test balanceado"""

def experiment_1():
    print(f"\n🔥 EXPERIMENTO 1: BALANCEO BÁSICO (750 + 750) - TEST BALANCEADO")

    # Limpiar memoria antes del experimento
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    # Usar solo 750 hot dogs - nueva semilla para este experimento
    random.seed(42)
    torch.manual_seed(42)

    selected_hotdog_indices = random.sample(hotdog_indices, 750)
    selected_not_hotdog_indices = random.sample(not_hotdog_indices, 750)

    # Crear dataset
    train_indices = selected_hotdog_indices + selected_not_hotdog_indices
    train_labels = [1.0] * 750 + [0.0] * 750

    train_ds = BinaryFoodDataset(
        train_dataset,
        train_indices,
        train_labels,
        transform=base_transform
    )

    train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=2)

    # Crear test loader balanceado
    test_loader = create_balanced_test_loader()

    print(f"Dataset de entrenamiento: {len(train_ds)} muestras")

    # Crear NUEVO modelo CNN desde cero
    model1 = CNNBinary()
    print("✅ Modelo CNN creado desde cero para Experimento 1")

    model1, results, best_acc = train_model(model1, train_loader, test_loader, "CNN con Balanceo Básico")

    # Limpiar después del entrenamiento
    del train_loader, train_ds
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    return model1, results, best_acc, test_loader

"""### EXPERIMENTO 2: Data Augmentation (10,000 muestras cada clase) - Test completo"""

def experiment_2():
    print(f"\n🚀 EXPERIMENTO 2: DATA AUGMENTATION (10,000 + 10,000) - TEST COMPLETO")

    # Limpiar memoria antes del experimento
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    # Reinicializar semillas para este experimento
    random.seed(123)  # Diferente semilla
    torch.manual_seed(123)

    # Dataset aumentado de hot dogs
    hotdog_augmented_ds = AugmentedHotdogDataset(
        train_dataset,
        hotdog_indices,
        target_size=10000,
        transform=augment_transform
    )

    # Dataset de no-hot-dogs (10,000 muestras)
    selected_not_hotdog_indices = random.sample(not_hotdog_indices, 10000)
    not_hotdog_labels = [0.0] * 10000

    not_hotdog_ds = BinaryFoodDataset(
        train_dataset,
        selected_not_hotdog_indices,
        not_hotdog_labels,
        transform=augment_transform
    )

    # Combinar datasets
    combined_ds = ConcatDataset([hotdog_augmented_ds, not_hotdog_ds])
    train_loader = DataLoader(combined_ds, batch_size=32, shuffle=True, num_workers=2)

    # Crear test loader completo
    test_loader = create_full_test_loader()

    print(f"Dataset de entrenamiento: {len(combined_ds)} muestras")

    # Crear NUEVO modelo CNN desde cero
    model2 = CNNBinary()
    print("✅ Modelo CNN creado desde cero para Experimento 2")

    model2, results, best_acc = train_model(model2, train_loader, test_loader, "CNN con Data Augmentation", n_epochs=7)

    # Limpiar después del entrenamiento
    del train_loader, combined_ds, hotdog_augmented_ds, not_hotdog_ds
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    return model2, results, best_acc, test_loader

"""### EXPERIMENTO 3: ResNet18 Preentrenado - Test completo"""

def experiment_3():
    print(f"\n🎯 EXPERIMENTO 3: RESNET18 PREENTRENADO (10,000 + 10,000) - TEST COMPLETO")

    # Limpiar memoria antes del experimento
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    # Reinicializar semillas para este experimento
    random.seed(456)  # Diferente semilla
    torch.manual_seed(456)

    # Dataset aumentado de hot dogs
    hotdog_augmented_ds = AugmentedHotdogDataset(
        train_dataset,
        hotdog_indices,
        target_size=10000,
        transform=augment_transform
    )

    selected_not_hotdog_indices = random.sample(not_hotdog_indices, 10000)
    not_hotdog_labels = [0.0] * 10000

    not_hotdog_ds = BinaryFoodDataset(
        train_dataset,
        selected_not_hotdog_indices,
        not_hotdog_labels,
        transform=augment_transform
    )

    combined_ds = ConcatDataset([hotdog_augmented_ds, not_hotdog_ds])
    train_loader = DataLoader(combined_ds, batch_size=32, shuffle=True, num_workers=2)

    # Crear test loader completo
    test_loader = create_full_test_loader()

    print(f"Dataset de entrenamiento: {len(combined_ds)} muestras")

    # Crear NUEVO modelo ResNet18 desde cero
    model3 = models.resnet18(pretrained=True)
    print("✅ Modelo ResNet18 cargado NUEVO desde pretrained para Experimento 3")

    # Fine-tuning: descongelar últimas capas
    for param in model3.parameters():
        param.requires_grad = False

    # Descongelar las últimas capas
    for param in model3.layer4.parameters():
        param.requires_grad = True
    for param in model3.avgpool.parameters():
        param.requires_grad = True

    # Reemplazar la última capa
    num_features = model3.fc.in_features
    model3.fc = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(num_features, 1),
        nn.Sigmoid()
    )

    model3, results, best_acc = train_model(model3, train_loader, test_loader, "ResNet18 Preentrenado", n_epochs=7)

    # Limpiar después del entrenamiento
    del train_loader, combined_ds, hotdog_augmented_ds, not_hotdog_ds
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    return model3, results, best_acc, test_loader

"""### Función principal para ejecutar todos los experimentos"""

def run_all_experiments():
    results_summary = []

    print("🔥🚀🎯 INICIANDO LOS 3 EXPERIMENTOS 🎯🚀🔥")
    print("="*60)

    # Experimento 1 - Test balanceado
    model1, results1, best_acc1, test_loader1 = experiment_1()
    detailed1 = evaluate_detailed(model1, test_loader1, "Experimento 1")
    results_summary.append(("Balanceo Básico (750+750) - Test Balanceado", best_acc1, detailed1))

    # Experimento 2 - Test completo
    model2, results2, best_acc2, test_loader2 = experiment_2()
    detailed2 = evaluate_detailed(model2, test_loader2, "Experimento 2")
    results_summary.append(("Data Augmentation (10k+10k) - Test Completo", best_acc2, detailed2))

    # Experimento 3 - Test completo
    model3, results3, best_acc3, test_loader3 = experiment_3()
    detailed3 = evaluate_detailed(model3, test_loader3, "Experimento 3")
    results_summary.append(("ResNet18 Preentrenado (10k+10k) - Test Completo", best_acc3, detailed3))

    # Resumen final
    print(f"\n{'='*60}")
    print("🏆 RESUMEN FINAL DE TODOS LOS EXPERIMENTOS 🏆")
    print(f"{'='*60}")

    for i, (name, best_acc, detailed) in enumerate(results_summary, 1):
        print(f"\n{i}. {name}:")
        print(f"   Mejor Test Accuracy: {best_acc:.2f}%")
        print(f"   Precision: {detailed['precision']:.3f}")
        print(f"   Recall: {detailed['recall']:.3f}")
        print(f"   F1-Score: {detailed['f1']:.3f}")

    # Encontrar el mejor (considerando que el Experimento 1 usa test diferente)
    print(f"\n📊 NOTA IMPORTANTE:")
    print(f"   - Experimento 1: Usa test balanceado (250 hot dogs + 250 no hot dogs)")
    print(f"   - Experimentos 2 y 3: Usan test completo (250 hot dogs + 24,750 no hot dogs)")
    print(f"   - Los resultados NO son directamente comparables debido a la diferencia en distribución")

    best_balanced = results_summary[0]  # Experimento 1
    best_full = max(results_summary[1:], key=lambda x: x[1])  # Experimentos 2 y 3

    print(f"\n🥇 MEJOR en Test Balanceado: {best_balanced[0]} con {best_balanced[1]:.2f}%")
    print(f"🥇 MEJOR en Test Completo: {best_full[0]} con {best_full[1]:.2f}%")

    return results_summary

"""### Ejecutar todos los experimentos"""

if __name__ == "__main__":
    results = run_all_experiments()