In [19]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np
import os
import matplotlib.pyplot as plt

In [20]:
# Transformación para el modelo FaceEmbeddingCNN (espera 1 canal: blanco y negro)
transform_gray = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((96, 96)),
    transforms.ToTensor()
])

# Transformación para el modelo CNNClasificador (espera 3 canales: RGB)
transform_rgb = transforms.Compose([
    transforms.Resize((96, 96)),
    transforms.ToTensor()
])


In [21]:
class FaceEmbeddingCNN(nn.Module):
    def __init__(self):
        super(FaceEmbeddingCNN, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 12 * 12, 128),
            nn.ReLU(),
            nn.Linear(128, 64)
        )

    def forward(self, x):
        return self.fc(self.cnn(x))


In [22]:
# Modelo simple en modo evaluación
model_simple = FaceEmbeddingCNN()
model_simple.eval()

# Cargar imágenes
img1 = Image.open("rostros/AdrianCisneros.jpg")
img2 = Image.open("rostros/JaimeLescano.jpg")

# Procesarlas
img1_tensor = transform_gray(img1).unsqueeze(0)
img2_tensor = transform_gray(img2).unsqueeze(0)

# Obtener embeddings
with torch.no_grad():
    emb1 = model_simple(img1_tensor)
    emb2 = model_simple(img2_tensor)

# Función para comparar
def comparar_embeddings(emb1, emb2, threshold=0.6):
    distancia = torch.norm(emb1 - emb2).item()
    print(f"Distancia entre embeddings: {distancia:.4f}")
    return distancia < threshold

# Comparar
if comparar_embeddings(emb1, emb2):
    print("🔒 Coincidencia: Mismo rostro detectado.")
else:
    print("❌ No coinciden: Rostros diferentes.")


Distancia entre embeddings: 0.0309
🔒 Coincidencia: Mismo rostro detectado.


Sale "Mismo rostro detectado" cuando son rostros diferentes, iremos entrenando la CNN.

In [23]:
augmentation = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.RandomResizedCrop(100, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor()
])

output_folder = "dataset_augmented"
os.makedirs(output_folder, exist_ok=True)

for person_name in os.listdir("dataset"):
    person_path = os.path.join("dataset", person_name)
    if not os.path.isdir(person_path): continue

    output_person_path = os.path.join(output_folder, person_name)
    os.makedirs(output_person_path, exist_ok=True)

    for file in os.listdir(person_path):
        if file.lower().endswith(('.jpg', '.png', '.jpeg')):
            image_path = os.path.join(person_path, file)
            image = Image.open(image_path)

            for i in range(10):
                augmented = augmentation(image)
                save_path = os.path.join(output_person_path, f"{file[:-4]}_aug_{i}.jpg")
                transforms.ToPILImage()(augmented).save(save_path)


In [24]:
# Dataset con transform_rgb
dataset = datasets.ImageFolder("dataset_augmented", transform=transform_rgb)

# Dividir en train y val
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_ds, val_ds = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=16)


In [25]:
class CNNClasificador(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(64 * 12 * 12, 128)
        self.relu = nn.ReLU()
        self.output = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        return self.output(x)


In [26]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = len(dataset.classes)
model = CNNClasificador(num_classes).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    total_loss = 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()
        output = model(imgs)
        loss = loss_fn(output, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"🎓 Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")


🎓 Epoch 1, Loss: 3.5295
🎓 Epoch 2, Loss: 3.4425
🎓 Epoch 3, Loss: 3.0034
🎓 Epoch 4, Loss: 2.0446
🎓 Epoch 5, Loss: 1.2016
🎓 Epoch 6, Loss: 0.6119
🎓 Epoch 7, Loss: 0.4015
🎓 Epoch 8, Loss: 0.1883
🎓 Epoch 9, Loss: 0.1360
🎓 Epoch 10, Loss: 0.1219


In [29]:
class ExtractorEmbeddings(nn.Module):
    def __init__(self, modelo_entrenado):
        super().__init__()
        self.features = modelo_entrenado.features
        self.flatten = modelo_entrenado.flatten
        self.fc1 = modelo_entrenado.fc1
        self.relu = modelo_entrenado.relu

    def forward(self, x):
        x = self.features(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        return x

extractor = ExtractorEmbeddings(model).to(device)
extractor.eval()

# Cargar dos imágenes en RGB
img1 = Image.open("rostros/AdrianCisneros.jpg").convert("RGB")
img2 = Image.open("rostros/JuanCarranza.jpg").convert("RGB")

img1_tensor = transform_rgb(img1).unsqueeze(0).to(device)
img2_tensor = transform_rgb(img2).unsqueeze(0).to(device)

with torch.no_grad():
    emb1 = extractor(img1_tensor)
    emb2 = extractor(img2_tensor)

# Similitud coseno
from torch.nn.functional import cosine_similarity

sim = cosine_similarity(emb1, emb2).item()
print(f"📏 Similitud coseno: {sim:.4f}")
if sim > 0.85:
    print("🔒 Coincidencia: Mismo rostro detectado.")
else:
    print("❌ No coinciden: Rostros diferentes.")


📏 Similitud coseno: 0.6726
❌ No coinciden: Rostros diferentes.
