# Visión con Transformers + OpenCV
Sistema de cámara en vivo con detección de rostro (Haar Cascade) y modelos HF para **emociones** y **edad**.

In [None]:
# configuración de la carpeta raíz al path
import sys, os
sys.path.append(os.path.abspath(".."))  

In [2]:
# Este cuaderno requiere webcam disponible.
# Presiona 'q' para salir de la ventana.
import os, cv2, numpy as np, tempfile, torch
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForImageClassification

print("Cargando modelos (puede demorar la 1ra vez si hay descarga/caché)...")
processor_emotion = AutoImageProcessor.from_pretrained("dima806/facial_emotions_image_detection")
model_emotion = AutoModelForImageClassification.from_pretrained("dima806/facial_emotions_image_detection")
processor_age = AutoImageProcessor.from_pretrained("Robys01/facial_age_estimator")
model_age = AutoModelForImageClassification.from_pretrained("Robys01/facial_age_estimator")

emotion_labels = {
    "sad": "Triste","disgust": "Disgusto","angry": "Enojado",
    "neutral": "Neutral","fear": "Miedo","surprise": "Sorpresa","happy": "Feliz"
}
age_labels = {0:'01',1:'02',2:'03',3:'04-05',4:'06-07',5:'08-09',6:'10-12',7:'13-24',8:'26-30',9:'26-30',
              10:'26-30',11:'31-35',12:'36-40',13:'41-45',14:'46-50',15:'51-55',16:'56-60',17:'61-70',
              18:'71-80',19:'81-90',20:'90+'}

# Haar cascade
src_path = os.path.join(cv2.data.haarcascades, 'haarcascade_frontalface_default.xml')
dest_path = os.path.join(tempfile.gettempdir(), 'haarcascade_frontalface_default.xml')
if not os.path.exists(dest_path):
    with open(src_path,'rb') as s, open(dest_path,'wb') as d: d.write(s.read())
face_cascade = cv2.CascadeClassifier(dest_path)

cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if not ret: break
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=5)
    for (x,y,w,h) in faces:
        face_img = frame[y:y+h, x:x+w]
        face_pil = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB))

        with torch.no_grad():
            em_inputs = processor_emotion(images=face_pil, return_tensors="pt")
            em_logits = model_emotion(**em_inputs).logits
            em_id = int(em_logits.argmax(-1).item())
            em_key = list(emotion_labels.keys())[em_id]
            emotion_text = emotion_labels[em_key]

            ag_inputs = processor_age(images=face_pil, return_tensors="pt")
            ag_logits = model_age(**ag_inputs).logits
            ag_id = int(ag_logits.argmax(-1).item())
            age_text = age_labels.get(ag_id, '?')

        cv2.rectangle(frame, (x,y),(x+w,y+h),(0,255,0),2)
        cv2.putText(frame, f'Emocion: {emotion_text}', (x, y-30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
        cv2.putText(frame, f'Edad: {age_text}', (x, y-8),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,0,0), 2)

    cv2.imshow("Emociones y Edad (q para salir)", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release(); cv2.destroyAllWindows()

Cargando modelos (puede demorar la 1ra vez si hay descarga/caché)...


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
