In [35]:
import os
from PIL import Image
import torch
import torchvision
from torchvision import datasets, transforms, models
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import KFold
import re
import shutil


In [15]:
directory = "./Datos/real_and_fake_face"


## Procesamiento de datos

In [4]:
def normalize_fake_images(original_directory, normalized_directory):
    # Crear el directorio normalizado si no existe
    os.makedirs(normalized_directory, exist_ok=True)

    category = "training_fake"
    category_path = os.path.join(original_directory, category)

    if os.path.isdir(category_path):
        normalized_category_path = os.path.join(normalized_directory, category)
        os.makedirs(normalized_category_path, exist_ok=True)

        for filename in os.listdir(category_path):
            if filename.endswith((".jpg", ".png")):
                img_path = os.path.join(category_path, filename)

                # Extraer información del nombre del archivo
                parts = filename.split('_')
                difficulty_part = parts[1] if len(parts) > 1 else None
                number_part = parts[2] if len(parts) > 2 else None

                # Construir el nuevo nombre basado en la información extraída
                new_filename = f"fake_{difficulty_part}_{number_part}" if difficulty_part else f"fake_{number_part}"
                new_path = os.path.join(normalized_category_path, new_filename)

                # Verificar si el archivo con el nuevo nombre ya existe, y cambiar el nombre si es necesario
                count = 1
                while os.path.exists(new_path):
                    new_filename = f"fake_{difficulty_part}_{number_part}_{count}" if difficulty_part else f"fake_{number_part}_{count}"
                    new_path = os.path.join(normalized_category_path, new_filename)
                    count += 1

                try:
                    # Renombrar el archivo
                    os.rename(img_path, new_path)
                    print("Renamed:", filename, "to", new_filename)
                except Exception as e:
                    print(f"Error renaming {filename}: {e}")

# Directorios originales
image_directory_fake = "./Datos/real_and_fake_face/training_fake"

# Directorios normalizados
normalized_image_directory = "./Datos/normalized_real_and_fake_face"

# Normalizar solo los archivos de la carpeta training_fake
normalize_fake_images(image_directory_fake, normalized_image_directory)

# Verificar si los nombres de los archivos se han cambiado correctamente
print("Archivos después de la normalización:")
for filename in os.listdir(normalized_image_directory):
    print(filename)


Archivos después de la normalización:


## Carga y transformaciones

In [18]:
# Aumento de datos y transformaciones
train_transform = transforms.Compose([
    transforms.RandomRotation(15),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Cargar datos
train_dataset = datasets.ImageFolder(directory, transform=train_transform)
val_dataset = datasets.ImageFolder(directory, transform=val_transform)

# Cargador de datos
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128, shuffle=True)

## Definición del modelo y entrenamiento

In [9]:
# Definir el modelo
resnet18_model = models.resnet18(pretrained=True)
resnet18_model.fc = nn.Sequential(
    nn.Linear(512, 256),
    nn.ELU(),
    nn.Dropout(0.5),
    nn.Linear(256, 1)
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet18_model = resnet18_model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(resnet18_model.parameters(), lr=0.0001)

# Número de divisiones para la validación cruzada
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

# Número de épocas
epochs = 5

for epoch in range(epochs):
    print(f"Epoch [{epoch + 1}/{epochs}]")

    # Validación cruzada
    for fold, (train_idx, val_idx) in enumerate(kf.split(train_dataset)):
        train_fold = torch.utils.data.Subset(train_dataset, train_idx)
        val_fold = torch.utils.data.Subset(train_dataset, val_idx)

        train_loader = torch.utils.data.DataLoader(train_fold, batch_size=128, shuffle=True, num_workers=4)
        val_loader = torch.utils.data.DataLoader(val_fold, batch_size=128, shuffle=True, num_workers=4)

        resnet18_model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0

        for batch_idx, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = resnet18_model(inputs)
            loss = criterion(outputs.squeeze(), labels.float())
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            predictions = (outputs > 0.5).float()
            correct_predictions += (predictions == labels.unsqueeze(1)).sum().item()
            total_samples += labels.size(0)

        average_loss = running_loss / len(train_loader)
        accuracy = correct_predictions / total_samples
        print(f'Fold {fold + 1}/{n_splits}, Train Loss: {average_loss:.4f}, Train Accuracy: {accuracy:.2%}')

        resnet18_model.eval()
        correct_predictions_val = 0
        total_samples_val = 0

        with torch.no_grad():
            for inputs_val, labels_val in val_loader:
                inputs_val, labels_val = inputs_val.to(device), labels_val.to(device)

                outputs_val = resnet18_model(inputs_val)
                predictions_val = (outputs_val > 0.5).float()

                correct_predictions_val += (predictions_val == labels_val.unsqueeze(1)).sum().item()
                total_samples_val += labels_val.size(0)

        accuracy_val = correct_predictions_val / total_samples_val
        print(f'Fold {fold + 1}/{n_splits}, Validation Accuracy: {accuracy_val:.2%}')



Epoch [1/5]
Fold 1/5, Train Loss: 0.6933, Train Accuracy: 52.82%
Fold 1/5, Validation Accuracy: 52.32%
Fold 2/5, Train Loss: 0.6694, Train Accuracy: 54.26%
Fold 2/5, Validation Accuracy: 61.76%
Fold 3/5, Train Loss: 0.6526, Train Accuracy: 58.54%
Fold 3/5, Validation Accuracy: 59.80%
Fold 4/5, Train Loss: 0.6340, Train Accuracy: 60.13%
Fold 4/5, Validation Accuracy: 63.48%
Fold 5/5, Train Loss: 0.6105, Train Accuracy: 64.85%
Fold 5/5, Validation Accuracy: 58.09%
Epoch [2/5]
Fold 1/5, Train Loss: 0.5812, Train Accuracy: 65.38%
Fold 1/5, Validation Accuracy: 68.46%
Fold 2/5, Train Loss: 0.5642, Train Accuracy: 67.91%
Fold 2/5, Validation Accuracy: 72.06%
Fold 3/5, Train Loss: 0.5465, Train Accuracy: 69.14%
Fold 3/5, Validation Accuracy: 70.34%
Fold 4/5, Train Loss: 0.5277, Train Accuracy: 71.34%
Fold 4/5, Validation Accuracy: 67.89%
Fold 5/5, Train Loss: 0.5047, Train Accuracy: 73.24%
Fold 5/5, Validation Accuracy: 76.72%
Epoch [3/5]
Fold 1/5, Train Loss: 0.4763, Train Accuracy: 75.18%
F

## Evaluación del rendimiento

In [11]:
def get_label(value, threshold=0.5):
    return 'real' if value > threshold else 'fake'

# Validación del modelo después de todas las épocas
resnet18_model.eval()
correct_predictions_val = 0
total_samples_val = 0
predicted_labels_val = []

with torch.no_grad():
    for inputs_val, labels_val in val_loader:
        inputs_val, labels_val = inputs_val.to(device), labels_val.to(device)

        outputs_val = resnet18_model(inputs_val)
        predictions_val = (outputs_val > 0.5).float()

        correct_predictions_val += (predictions_val == labels_val.unsqueeze(1)).sum().item()
        total_samples_val += labels_val.size(0)

        predicted_labels_val.extend(predictions_val.cpu().numpy().tolist())  # Guardar las predicciones

# Calcular métricas al final de todas las épocas en el conjunto de validación
accuracy_val = correct_predictions_val / total_samples_val
print(f'Validation - After all epochs, Accuracy: {accuracy_val:.2%}')

# Obtener un lote de imágenes y etiquetas del conjunto de validación
validation_images, validation_labels = next(iter(val_loader))

# Mover el modelo a la GPU si está disponible
resnet18_model = resnet18_model.to(device)

# Obtener predicciones del modelo
with torch.no_grad():
    resnet18_model.eval()
    outputs = resnet18_model(validation_images.to(device))
    predictions = torch.sigmoid(outputs)

# Umbral para clasificación binaria
threshold = 0.5

# Contadores para aciertos e incorrectos
correct_count = 0
incorrect_count = 0

# Validación sin mostrar imágenes y nombres
for i in range(len(validation_images)):
    predicted_label = get_label(predictions[i].item(), threshold)
    actual_label = get_label(validation_labels[i].item(), threshold)

    # Imprimir si la predicción fue correcta e incorrecta
    if predicted_label == actual_label:
        correct_count += 1
    else:
        incorrect_count += 1

# Imprimir el número total de muestras y resultados
print(f"Total de muestras evaluadas: {total_samples_val}")
print(f"Total de predicciones correctas: {correct_count}")
print(f"Total de predicciones incorrectas: {incorrect_count}")

Validation - After all epochs, Accuracy: 83.33%
Total de muestras evaluadas: 408
Total de predicciones correctas: 107
Total de predicciones incorrectas: 21
