In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os
import matplotlib.pyplot as plt

# Definir la arquitectura del Autoencoder Convolucional
class ConvAutoencoder(nn.Module):
    def __init__(self):
        super(ConvAutoencoder, self).__init__()
        
        # Encoder
        self.encoder = nn.Sequential(
            # Primera capa conv: 3 -> 32 canales
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 128 -> 64
            
            # Segunda capa conv: 32 -> 64 canales
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 64 -> 32
            
            # Tercera capa conv: 64 -> 128 canales
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 32 -> 16
            
            # Cuarta capa conv: 128 -> 256 canales
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)   # 16 -> 8
        )
        
        # Decoder
        self.decoder = nn.Sequential(
            # Primera capa deconv: 256 -> 128 canales
            nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2),
            nn.ReLU(),
            
            # Segunda capa deconv: 128 -> 64 canales
            nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2),
            nn.ReLU(),
            
            # Tercera capa deconv: 64 -> 32 canales
            nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2),
            nn.ReLU(),
            
            # Cuarta capa deconv: 32 -> 3 canales
            nn.ConvTranspose2d(32, 3, kernel_size=2, stride=2),
            nn.Sigmoid()  # Normalizar salida entre 0 y 1
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Dataset personalizado para cargar imágenes de rostros
class FaceDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_files = [f for f in os.listdir(root_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.image_files[idx])
        image = Image.open(img_name).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        return image

# Configuración de transformaciones para las imágenes
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Redimensionar a 128x128
    transforms.ToTensor()
])

# Función de entrenamiento
def train_autoencoder(model, train_loader, num_epochs, device):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        
        for batch_idx, data in enumerate(train_loader):
            img = data.to(device)
            
            # Forward pass
            output = model(img)
            loss = criterion(output, img)
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            
            if (batch_idx + 1) % 10 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx+1}/{len(train_loader)}], '
                      f'Loss: {loss.item():.6f}')
        
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {avg_loss:.6f}')

# Función para visualizar resultados
def visualize_results(model, dataset, num_images=5, device='cuda'):
    model.eval()
    dataloader = DataLoader(dataset, batch_size=num_images, shuffle=True)
    
    with torch.no_grad():
        images = next(iter(dataloader))
        images = images.to(device)
        outputs = model(images)
        
        # Mostrar imágenes originales y reconstruidas
        plt.figure(figsize=(12, 4))
        
        for i in range(num_images):
            # Imagen original
            plt.subplot(2, num_images, i + 1)
            plt.imshow(images[i].cpu().permute(1, 2, 0))
            plt.axis('off')
            if i == 0:
                plt.title('Original')
            
            # Imagen reconstruida
            plt.subplot(2, num_images, i + num_images + 1)
            plt.imshow(outputs[i].cpu().permute(1, 2, 0))
            plt.axis('off')
            if i == 0:
                plt.title('Reconstruida')
        
        plt.tight_layout()
        plt.show()

# Uso del modelo
def main():
    # Configurar dispositivo
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Usando dispositivo: {device}")

    # Crear el dataset (ajusta el path a tu directorio de imágenes)
    dataset = FaceDataset(root_dir='path/to/face/images', transform=transform)
    train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

    # Inicializar y mover el modelo al dispositivo
    model = ConvAutoencoder().to(device)

    # Entrenar el modelo
    train_autoencoder(model, train_loader, num_epochs=50, device=device)

    # Visualizar resultados
    visualize_results(model, dataset, device=device)

    # Guardar el modelo
    torch.save(model.state_dict(), 'face_autoencoder.pth')

if __name__ == '__main__':
    main()