In [2]:
import cv2
import numpy as np
import tensorflow as tf
import pandas as pd
import mediapipe as mp


In [3]:
# Diretório usado para guardar o modelo
model_dir = r'D:\USER DATA NAO APAGAR\Desktop\Unifesp\2024 - 1 - semestre\IA\projeto\modelos\MobileNetV2'
model_path = model_dir + '/best_MobileNetV2_model.keras'

# Diretório usado para guardar o video
video_dir = r'D:\USER DATA NAO APAGAR\Desktop\Unifesp\2024 - 1 - semestre\IA\projeto\videos_dos_jogos'
video_path = video_dir + '/trem.MOV'

# Carrega modelo treinado
model = tf.keras.models.load_model(model_path)



In [21]:
# Diretório usado para guardar o video
video_dir = r'D:\USER DATA NAO APAGAR\Desktop\Unifesp\2024 - 1 - semestre\IA\projeto\videos_dos_jogos'
video_path = video_dir + '/bombeiro.MOV'

In [6]:
# Inicializa módulo MediaPipe Hands 
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

In [7]:
# Carrega os rótulos do arquivo CSV
labels_df = pd.read_csv('Labels.csv')  # Assuming the CSV has a column 'label'
labels = labels_df['LABEL'].tolist()

In [8]:
# Função de pré-processamento de cada frame
def preprocess_frame(frame, target_size=(224, 224)):
    # Redimensiona o frame para a entrada esperada do modelo
    frame_resized = cv2.resize(frame, target_size)
    # Normaliza o frame
    frame_normalized = frame_resized / 255.0
    # Espande as dimensões para corresponder a entrada do modelo
    return np.expand_dims(frame_normalized, axis=0)

# Função para mostrar bounding box e classificar os resultados de cada frame
def display_frame(frame, prediction, confidence, hand_landmarks_list):
    # Adiciona texto no overlar para os resultados da classificação
    label_name = labels[prediction]
    label_text = f"Predicted: {label_name}, Confidence: {confidence:.2f}"
    cv2.putText(frame, label_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    
    # Bounding boxes
    h, w, _ = frame.shape
    for hand_landmarks in hand_landmarks_list:
        x_min, y_min = w, h
        x_max, y_max = 0, 0
        for lm in hand_landmarks.landmark:
            x, y = int(lm.x * w), int(lm.y * h)
            if x < x_min: x_min = x
            if y < y_min: y_min = y
            if x > x_max: x_max = x
            if y > y_max: y_max = y

        # Desenha as bounding boxes ao redor da mão
        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
    
    return frame

In [22]:
# Abre arquivo
cap = cv2.VideoCapture(video_path)

# Verifica se foi aberto corretamente
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()
    
frame_count = 0

while True:
    ret, frame = cap.read()

    if not ret:
        break  # Sai do loop quando o video termina

    # Pré-processamento do frame
    processed_frame = preprocess_frame(frame)
    
    # Converte o frame para RGB
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    
    # Processa o frame e detecta as mãos
    results = hands.process(image)
    
    # Converte a imagem de volta para BGR para o Display
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    # Inicializa uma lista vazia para manter os landmarks das mãos
    hand_landmarks_list = []
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            hand_landmarks_list.append(hand_landmarks)
    
    # Preve o modelo
    predictions = model.predict(processed_frame)
    predicted_class = np.argmax(predictions)
    confidence = np.max(predictions)

    # Mostra os resultados e a bounding box em cada frame
    frame_with_prediction = display_frame(frame, predicted_class, confidence, hand_landmarks_list)

    # Mostra os frames
    cv2.imshow('Video', frame_with_prediction)
    
    # Delay para o botão
    key = cv2.waitKey(25) & 0xFF

    # Tirar print com o tecla 'p'
    if key == ord('p'):
        screenshot_name = f'screenshot_{frame_count}.png'
        cv2.imwrite(screenshot_name, frame)
        print(f'Screenshot saved as {screenshot_name}')
        frame_count += 1

    # Sair do loop quando pressionar 'q' 
    if cv2.waitKey(25) & 0xFF == ord('q'):
        break
    
# Finaliza a captura de video e fecha a aba
cap.release()
cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40