# 1. Importando Dependencias

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
import pickle

In [2]:
with open('config.pickle', 'rb') as handle:
    config = pickle.load(handle)
config

{'DATA_PATH': 'MP_Data',
 'actions': array(['oi', 'eu_amo_voce'], dtype='<U11'),
 'no_sequences': 30,
 'sequence_length': 30}

## 1.1 - Funções importantes para detecção

In [3]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utils

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
    image.flags.writeable = False                 
    results = model.process(image)                
    image.flags.writeable = True                  
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

In [5]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) # Desenha conexões da face
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Desenha conexões da pose
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Desenha conexões da mão esquerda
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Desenha conexões da mão direita

In [6]:
def draw_styled_landmarks(image, results):
    # Desenha conexões da face
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Desenha conexões da pose
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Desenha conexões da mão esquerda
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Desenha conexões da mão direita
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [7]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

# 2. Pre-processando os dados e separando em treino/teste

In [8]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [9]:
label_map = {label:num for num, label in enumerate(config['actions'])}
label_map

{'oi': 0, 'eu_amo_voce': 1}

In [10]:
sequences, labels = [], []
for action in config['actions']:
    for sequence in np.array(os.listdir(os.path.join(config['DATA_PATH'], action))).astype(int):
        window = []
        for frame_num in range(config['sequence_length']):
            res = np.load(os.path.join(config['DATA_PATH'], action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [11]:
X = np.array(sequences)
X.shape

(60, 30, 1662)

In [12]:
y = to_categorical(labels).astype(int)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

In [14]:
# temos 3 dados separados para teste
y_test.shape

(3, 2)

# 3. Criando e Treinando a LSTM Neural Network

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [16]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(config['actions'].shape[0], activation='softmax'))

## Caso queira criar um modelo novo

In [None]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [68]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [14]:
model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callback])

In [36]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_9 (LSTM)               (None, 30, 64)            442112    
                                                                 
 lstm_10 (LSTM)              (None, 30, 128)           98816     
                                                                 
 lstm_11 (LSTM)              (None, 64)                49408     
                                                                 
 dense_8 (Dense)             (None, 64)                4160      
                                                                 
 dense_9 (Dense)             (None, 32)                2080      
                                                                 
 dense_10 (Dense)            (None, 2)                 66        
                                                                 
Total params: 596642 (2.28 MB)
Trainable params: 59664

In [12]:
model.save('model.h5')



  saving_api.save_model(


## Caso queira carregar um modelo já treinado

In [17]:
model_path = 'model.h5'
model.load_weights(model_path)

# 4. Testando em tempo real

In [18]:
from scipy import stats

In [19]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [20]:
# Variáveis de detecção
sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Leitura da camera
        ret, frame = cap.read()

        # Faz as conexões
        image, results = mediapipe_detection(frame, holistic)
        
        # Desenha landmarks
        draw_styled_landmarks(image, results)
        
        # Lógica para predição
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(config['actions'][np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if config['actions'][np.argmax(res)] != sentence[-1]:
                            sentence.append(config['actions'][np.argmax(res)])
                    else:
                        sentence.append(config['actions'][np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, config['actions'], image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Mostra na tela
        cv2.imshow('OpenCV Feed', image)

        # Pressione q para interromper
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi


oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
