<a href="https://colab.research.google.com/github/jordanmsouza/TechChallenge_Fase4_Grupo4/blob/main/Tech_Callenge_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Montando google driver

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Instalando dependências

In [2]:
!pip install opencv-python
!pip install numpy
!pip install deepface
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install 'git+https://github.com/facebookresearch/detectron2.git'
!pip install mediapipe
!pip install tqdm

Collecting deepface
  Downloading deepface-0.0.93-py3-none-any.whl.metadata (30 kB)
Collecting flask-cors>=4.0.1 (from deepface)
  Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl.metadata (5.5 kB)
Collecting mtcnn>=0.1.0 (from deepface)
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting retina-face>=0.0.1 (from deepface)
  Downloading retina_face-0.0.17-py3-none-any.whl.metadata (10 kB)
Collecting fire>=0.4.0 (from deepface)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gunicorn>=20.1.0 (from deepface)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting lz4>=4.3.3 (from mtcnn>=0.1.0->deepface)
  Downloading lz4-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Downloading deepface-0.0.93-py3-none-any.whl (108 kB)
[2K   [90m━

Collecting mediapipe
  Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.1/36.1 MB[0m [31m21.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.18 sounddevice-0.5.1


### Importando as dependências

In [2]:
import cv2
import numpy as np
from deepface import DeepFace
import detectron2
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.utils.visualizer import Visualizer, ColorMode
from collections import defaultdict
from tqdm import tqdm
import os
import mediapipe as mp

24-12-02 17:58:19 - Directory /root/.deepface has been created
24-12-02 17:58:19 - Directory /root/.deepface/weights has been created


### Configuração da pasta de saída

In [3]:
output_dir = "/content/drive/MyDrive/Visao_computacional/"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

### Inicializar MediaPipe Pose

In [4]:
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

### Função para análise de emoções (ajustada para suavização)

In [5]:
# Histórico para suavizar mudanças nas emoções
emotion_history = defaultdict(int)
emotion_persistence_threshold = 4  # Quantos frames para confirmar a nova emoção
last_confirmed_emotion = None  # Última emoção confirmada

def analyze_face_emotions(frame, boxes, last_emotion):
    global emotion_history, last_confirmed_emotion
    try:
        for box in boxes:
            x1, y1, x2, y2 = map(int, box)
            roi = frame[y1:y2, x1:x2]  # Recorte da região de interesse (rosto)
            analysis = DeepFace.analyze(roi, actions=['emotion'], enforce_detection=False)
            emotions = analysis[0]["emotion"]
            dominant_emotion = max(emotions, key=emotions.get)

            # Atualizar o histórico da emoção detectada
            if dominant_emotion != last_emotion:
                emotion_history[dominant_emotion] += 1
                emotion_history[last_emotion] = max(0, emotion_history[last_emotion] - 1)
            else:
                emotion_history[dominant_emotion] += 1

            # Confirmar mudança de emoção após atingir o limiar de persistência
            if emotion_history[dominant_emotion] >= emotion_persistence_threshold:
                # Reiniciar o contador para outras emoções
                for key in list(emotion_history.keys()):
                    if key != dominant_emotion:
                        emotion_history[key] = 0

                # Registrar a emoção apenas se for diferente da última confirmada
                if dominant_emotion != last_confirmed_emotion:
                    last_confirmed_emotion = dominant_emotion
                    return dominant_emotion

        return last_confirmed_emotion
    except Exception:
        return last_confirmed_emotion

### Função para inicializar o modelo Detectron2

In [6]:
def setup_detectron2():
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")
    return DefaultPredictor(cfg)

### Função para detecção de atividades específicas

In [7]:
# Histórico de movimentos
previous_positions = {}

def detect_activities(frame, pose_results):
    activities = []
    if pose_results.pose_landmarks:
        landmarks = pose_results.pose_landmarks.landmark
        left_hand = landmarks[mp_pose.PoseLandmark.LEFT_WRIST]
        right_hand = landmarks[mp_pose.PoseLandmark.RIGHT_WRIST]
        left_shoulder = landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER]
        right_shoulder = landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER]
        nose = landmarks[mp_pose.PoseLandmark.NOSE]

        # Mão levantada (sem movimento lateral)
        if left_hand.y < left_shoulder.y and abs(left_hand.x - left_shoulder.x) < 0.2:
            activities.append("Left hand raised")
        if right_hand.y < right_shoulder.y and abs(right_hand.x - right_shoulder.x) < 0.2:
            activities.append("Right hand raised")

        # Acenando (movimento lateral)
        if abs(left_hand.y - left_shoulder.y) < 0.2 and abs(left_hand.x - landmarks[mp_pose.PoseLandmark.LEFT_ELBOW].x) > 0.3:
            activities.append("Waving with the left hand")
        if abs(right_hand.y - right_shoulder.y) < 0.2 and abs(right_hand.x - landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW].x) > 0.3:
            activities.append("Waving with the right hand")

        # Aperto de mão (mãos estendidas uma em direção à outra)
        if abs(left_hand.x - right_hand.x) < 0.2 and abs(left_hand.y - right_hand.y) < 0.2:
            left_hand_prev = previous_positions.get("left_hand", (None, None))
            right_hand_prev = previous_positions.get("right_hand", (None, None))
            left_move = abs(left_hand.x - left_hand_prev[0]) + abs(left_hand.y - left_hand_prev[1]) if left_hand_prev != (None, None) else 0
            right_move = abs(right_hand.x - right_hand_prev[0]) + abs(right_hand.y - right_hand_prev[1]) if right_hand_prev != (None, None) else 0
            if left_move > 0.1 or right_move > 0.1:
                activities.append("Handshake")
            previous_positions["left_hand"] = (left_hand.x, left_hand.y)
            previous_positions["right_hand"] = (right_hand.x, right_hand.y)

        # Dançando (movimento significativo de várias partes do corpo)
        movement_threshold = 0.3
        total_movement = 0
        keypoints = [nose, left_shoulder, right_shoulder, left_hand, right_hand]

        for idx, point in enumerate(keypoints):
            if point.visibility > 0.5:
                prev_position = previous_positions.get(idx, None)
                if prev_position:
                    total_movement += abs(point.x - prev_position[0]) + abs(point.y - prev_position[1])
                previous_positions[idx] = (point.x, point.y)

        if total_movement > movement_threshold:
            activities.append("Dancing")

        if len(activities) == 0:
            activities.append("Activity not detected")

    return activities

### Função para processar o vídeo

In [8]:
def analyze_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    output_video_path = os.path.join(output_dir, "output_video_1.mp4")
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    face_emotions_count = defaultdict(int)
    activity_count = defaultdict(int)
    last_emotion = None

    predictor = setup_detectron2()

    with tqdm(total=frame_count, desc="Analisando vídeo") as pbar:
        for _ in range(frame_count):
            ret, frame = cap.read()
            if not ret:
                break

            outputs = predictor(frame)
            instances = outputs["instances"].to("cpu")
            classes = instances.pred_classes.numpy()
            boxes = instances.pred_boxes.tensor.numpy()
            person_boxes = [box for i, box in enumerate(boxes) if classes[i] == 0]

            for box in person_boxes:
                x1, y1, x2, y2 = map(int, box)
                # Desenhar enquadramento do corpo em azul
                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)

            # Adicionar detecção de rosto separada
            face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            faces = face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

            for (fx, fy, fw, fh) in faces:
                # Desenhar enquadramento do rosto em verde
                cv2.rectangle(frame, (fx, fy), (fx + fw, fy + fh), (0, 255, 0), 2)

            pose_results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            activities = detect_activities(frame, pose_results)
            for activity in activities:
                activity_count[activity] += 1
            if activities:
                cv2.putText(frame, f"Activities: {', '.join(activities)}", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)

            last_emotion = analyze_face_emotions(frame, person_boxes, last_emotion)
            if last_emotion:
                face_emotions_count[last_emotion] += 1
                cv2.putText(frame, f"Emotion: {last_emotion}", (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)

            out.write(frame)
            pbar.update(1)

    cap.release()
    out.release()

    summary_path = os.path.join(output_dir, "video_analysis_summary_1.txt")
    with open(summary_path, "w") as f:
        f.write("Resumo de Análise de Vídeo\n\n")
        f.write("Emoções Detectadas:\n")
        for emotion, count in face_emotions_count.items():
            f.write(f"{emotion}: {count} ocorrências\n")
        f.write("\nAtividades Detectadas:\n")
        for activity, count in activity_count.items():
            f.write(f"{activity}: {count} ocorrências\n")

### Caminho para o vídeo

In [10]:
video_path = "/content/drive/MyDrive/Visao_computacional/Unlocking_Facial_Recognition_Diverse Activities_Analysis.mp4"
analyze_video(video_path)

Analisando vídeo:   0%|          | 0/3326 [00:00<?, ?it/s]

24-12-02 18:01:04 - facial_expression_model_weights.h5 will be downloaded...


Downloading...
From: https://github.com/serengil/deepface_models/releases/download/v1.0/facial_expression_model_weights.h5
To: /root/.deepface/weights/facial_expression_model_weights.h5

100%|██████████| 5.98M/5.98M [00:00<00:00, 301MB/s]
Analisando vídeo: 100%|██████████| 3326/3326 [56:26<00:00,  1.02s/it]
