In [1]:
import torch
import torch.nn.functional as F
from torch.nn.modules.conv import Conv2d
from ultralytics import YOLO
import cv2
import random
import numpy as np
from PIL import Image

# FIX para Conv2d, que daba error de compatibilidad por el modelo de detección de caras
def _patched_conv_forward(self, input, weight, bias):
    dilation = self.dilation
    if isinstance(dilation, tuple):
        dilation = tuple(int(x) for x in dilation)
    else:
        dilation = int(dilation)

    stride = self.stride
    padding = self.padding

    if self.padding_mode != "zeros":
        return F.conv2d(
            F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
            weight,
            bias,
            stride,
            (0, 0),
            dilation,
            self.groups,
        )

    return F.conv2d(input, weight, bias, stride, padding, dilation, self.groups)

Conv2d._conv_forward = _patched_conv_forward

# MODELOS

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

face_model = YOLO("weights/yolov8-lite-t.pt")
emotion_model = YOLO("weights/best.pt")

EMOTION_LABELS = [
    'Enfadado',
    'Contento',
    'Disgustado',
    'Asustado',
    'Feliz',
    'Neutral',
    'Triste',
    'Sorprendido'
]

# MÉTODOS PARA CARGA Y SUPERPOSICIÓN DE LAS IMÁGENES

def preescale_png(path, fixed_width):
    png = cv2.imread(path, cv2.IMREAD_UNCHANGED)  # RGBA
    if png is None:
        raise FileNotFoundError(f"No se pudo cargar el PNG: {path}")

    orig_h, orig_w = png.shape[:2]

    scale = fixed_width / orig_w
    new_h = int(orig_h * scale)

    png_fixed = cv2.resize(png, (fixed_width, new_h), interpolation=cv2.INTER_AREA)

    return png_fixed

def gif_loader(path):
    gif_frames = []
    gif = Image.open(path)
    try:
        while True:
            frame = gif.convert("RGBA")
            frame_np = np.array(frame)
            frame_bgra = cv2.cvtColor(frame_np, cv2.COLOR_RGBA2BGRA)
            gif_frames.append(frame_bgra)
            gif.seek(gif.tell() + 1)
    except EOFError:
        pass
    frame_idx = 0
    return gif_frames, frame_idx

def overlay_rgba(base_img, rgba_img, x, y, scale=1.0):
    h_png, w_png = rgba_img.shape[:2]
    new_w = int(w_png * scale)
    new_h = int(h_png * scale)
    if new_w <= 0 or new_h <= 0:
        return

    resized = cv2.resize(rgba_img, (new_w, new_h), interpolation=cv2.INTER_AREA)

    if resized.shape[2] == 4:
        bgr = resized[:, :, :3]
        alpha = resized[:, :, 3].astype(float) / 255.0  # (H, W)
    else:
        bgr = resized
        alpha = np.ones((new_h, new_w), dtype=float)

    H, W = base_img.shape[:2]

    x1 = max(0, x)
    y1 = max(0, y)
    x2 = min(W, x1 + new_w)
    y2 = min(H, y1 + new_h)

    if x1 >= W or y1 >= H or x2 <= 0 or y2 <= 0:
        return

    bgr = bgr[0:y2 - y1, 0:x2 - x1]
    alpha = alpha[0:y2 - y1, 0:x2 - x1]

    roi = base_img[y1:y2, x1:x2]

    alpha_3 = alpha[:, :, None]

    base_img[y1:y2, x1:x2] = (bgr * alpha_3 + roi * (1.0 - alpha_3)).astype(np.uint8)


def overlay_png(base_img, png_img, x, y, scale=1.0):
    h_png, w_png = png_img.shape[:2]
    new_w = int(w_png * scale)
    new_h = int(h_png * scale)
    if new_w <= 0 or new_h <= 0:
        return

    png_resized = cv2.resize(png_img, (new_w, new_h), interpolation=cv2.INTER_AREA)

    bgr = png_resized[:, :, :3]
    alpha = png_resized[:, :, 3] / 255.0  # 0–1

    H, W = base_img.shape[:2]

    x1 = max(0, x)
    y1 = max(0, y)
    x2 = min(W, x1 + new_w)
    y2 = min(H, y1 + new_h)

    if x1 >= W or y1 >= H:
        return

    bgr = bgr[0: y2 - y1, 0: x2 - x1]
    alpha = alpha[0: y2 - y1, 0: x2 - x1]

    roi = base_img[y1:y2, x1:x2]

    for c in range(3):
        roi[:, :, c] = (bgr[:, :, c] * alpha + roi[:, :, c] * (1 - alpha)).astype(np.uint8)

    base_img[y1:y2, x1:x2] = roi

# FUNCIONES DE FILTRO

def draw_png(img, x1, y1, x2, y2, png_fixed):
    new_h, new_w = png_fixed.shape[:2]

    face_width = x2 - x1
    face_center = x1 + face_width // 2

    margin = 10
    top_left_x = face_center - new_w // 2
    top_left_y = y1 - new_h - margin

    overlay_png(img, png_fixed, top_left_x, top_left_y)

def draw_rain(img, x1, y1, x2, y2):
    h, w, _ = img.shape
    rx1 = max(0, x1 - 20)
    rx2 = min(w - 1, x2 + 20)
    ry1 = max(0, y1 - 40)
    ry2 = min(h - 1, y2 + 40)

    for _ in range(40):
        x = random.randint(rx1, rx2)
        y_start = random.randint(ry1, ry2 - 10)
        y_end = min(h - 1, y_start + random.randint(10, 25))
        cv2.line(img, (x, y_start), (x, y_end), (255, 255, 255), 1)

def draw_gif(img, x1, y1, x2, y2, frames, frame_idx, speed=1, scale_div=2.2, scale_min=0.3, scale_max=1.2, y_offset_factor=0.1):
    if not frames:
        return frame_idx

    face_width = x2 - x1
    if face_width <= 0:
        return frame_idx

    base_h, base_w = frames[0].shape[:2]

    scale = face_width / (scale_div * base_w)
    scale = max(scale_min, min(scale, scale_max))

    new_w = int(base_w * scale)
    new_h = int(base_h * scale)

    face_center = x1 + face_width // 2

    top_left_x = face_center - new_w // 2
    top_left_y = y2 - int(new_h * y_offset_factor)

    frame = frames[frame_idx % len(frames)]

    overlay_rgba(img, frame, top_left_x, top_left_y, scale=scale)

    return frame_idx + speed

# CARGA DE IMÁGENES, GIFS Y LOOP DE LA WEBCAM

cap = cv2.VideoCapture(0)
FIXED_WIDTH = 150
balloon_png = preescale_png("images/balloons.png", FIXED_WIDTH)
gifts_png = preescale_png("images/gifts.png", FIXED_WIDTH)
vomit_png = preescale_png("images/vomit.png", FIXED_WIDTH)
ellipsis_png = preescale_png("images/ellipsis.png", FIXED_WIDTH)
contempt_png = preescale_png("images/contempt.png", FIXED_WIDTH)

fire_gif_frames, fire_frame_idx = gif_loader("images/fire.gif")

spiders_gif_frames, spiders_frame_idx = gif_loader("images/spiders.gif")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Detección de caras
    face_results = face_model(frame, conf=0.3, verbose=False)

    annotated = frame.copy()
    boxes = face_results[0].boxes

    if boxes is not None and len(boxes) > 0:
        for box in boxes:
            # Coordenadas de la caja de la cara
            x1, y1, x2, y2 = box.xyxy[0].int().tolist()

            h, w, _ = frame.shape
            x1 = max(0, min(x1, w - 1))
            x2 = max(0, min(x2, w - 1))
            y1 = max(0, min(y1, h - 1))
            y2 = max(0, min(y2, h - 1))
            if x2 <= x1 or y2 <= y1:
                continue

            face_crop = frame[y1:y2, x1:x2]
            if face_crop.size == 0:
                continue

            # Detección de emoción sobre el crop
            emo_results = emotion_model(face_crop, conf=0.25, verbose=False)
            emo_boxes = emo_results[0].boxes

            if emo_boxes is None or len(emo_boxes) == 0:
                continue

            # Detección de emoción con mayor confianza
            confs = emo_boxes.conf
            best_idx = int(confs.argmax().item())
            cls_id = int(emo_boxes.cls[best_idx].item())

            if 0 <= cls_id < len(EMOTION_LABELS):
                emotion = EMOTION_LABELS[cls_id]
            else:
                emotion = "Unknown"

            cv2.putText(
                annotated,
                emotion,
                (x1, max(y1 - 10, 0)),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.8,
                (0, 255, 0),
                2,
                cv2.LINE_AA,
            )

            # Se aplica filtro según emoción
            if emotion == 'Feliz':
                draw_png(annotated, x1, y1, x2, y2, balloon_png)
            elif emotion == 'Triste':
                draw_rain(annotated, x1, y1, x2, y2)
            elif emotion == 'Enfadado':
                fire_frame_idx = draw_gif(annotated, x1, y1, x2, y2, frames=fire_gif_frames, frame_idx=fire_frame_idx)
            elif emotion == 'Sorprendido':
                draw_png(annotated, x1, y1, x2, y2, gifts_png)
            elif emotion == 'Disgustado':
                draw_png(annotated, x1, y1, x2, y2, vomit_png)
            elif emotion == 'Neutral':
                draw_png(annotated, x1, y1, x2, y2, ellipsis_png)
            elif emotion == 'Contento':
                draw_png(annotated, x1, y1, x2, y2, contempt_png)
            elif emotion == 'Asustado':
                spiders_frame_idx = draw_gif(annotated, x1, y1, x2, y2, frames=spiders_gif_frames, frame_idx=spiders_frame_idx, speed=4)

    cv2.imshow("Filtro de emociones", annotated)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


  import pkg_resources as pkg
