In [29]:
import os
import cv2
import numpy as np
import mediapipe as mp
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

In [30]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5)

def process_frame(frame):
    # Converter a cor da imagem de BGR para RGB
    rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Processar a imagem e obter os resultados da detecção da mão
    results = hands.process(rgb_image)

    landmarks = []
    if results.multi_hand_landmarks is not None and len(results.multi_hand_landmarks) == 2:
        for hand_landmarks in results.multi_hand_landmarks:
            # Desenhar os pontos de referência da mão na imagem
            mp.solutions.drawing_utils.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            # Salvar as coordenadas dos pontos de referência da mão
            for id, lm in enumerate(hand_landmarks.landmark):
                h, w, c = frame.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                landmarks.append([id, cx, cy])
    else:
        landmarks = [[0, 0, 0] for _ in range(42)]  # Supondo que haja 21 pontos de referência por mão

    return frame, landmarks

In [31]:
def load_data(data_dir):
    X = []
    y = []
    all_landmarks = []
    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        for filename in os.listdir(label_dir):
            file_path = os.path.join(label_dir, filename)
            if filename.endswith('.mp4'):
                cap = cv2.VideoCapture(file_path)
                ret, frame = cap.read()
                if not ret:
                    print(f"Não foi possível abrir o vídeo {file_path}")
                    continue
                frame, landmarks = process_frame(frame)
                if landmarks is not None:
                    img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                    img = cv2.resize(img, (64, 64))  # Redimensionar para o tamanho desejado
                    X.append(img)
                    y.append(label)  # Rotular com o nome da pasta (palavra em Libras)
                    all_landmarks.append((landmarks, label))  # Salvar os marcos e a classe correspondente
    return np.array(X), np.array(y), all_landmarks

num_classes = 5

In [32]:
# Definir o modelo CNN
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))  # num_classes é o número de palavras em Libras

# Compilar o modelo
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Carregar os dados de treinamento
X_train, y_train, all_landmarks = load_data('./pasta2')

# Pré-processamento dos dados
X_train = X_train.reshape(-1, 64, 64, 1)
X_train = X_train.astype('float32') / 255.0

encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)

# Converter rótulos inteiros para formato one-hot
y_train_one_hot = to_categorical(y_train_encoded)

# Agora você pode treinar o modelo com y_train_one_hot
model.fit(X_train, y_train_one_hot, epochs=10, batch_size=32)

# Salvar o modelo treinado
model.save('libras_model_mediapipe.h5')
landmarks, classes = zip(*all_landmarks)
np.save('landmarks.npy', np.array(landmarks))
np.save('classes_mediapipe.npy', np.array(classes))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.2353 - loss: 1.6091
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - accuracy: 0.2941 - loss: 1.3991
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step - accuracy: 0.7059 - loss: 1.0090
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step - accuracy: 0.7647 - loss: 0.8366
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step - accuracy: 0.8824 - loss: 0.6592
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step - accuracy: 1.0000 - loss: 0.5091
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step - accuracy: 1.0000 - loss: 0.4060
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - accuracy: 1.0000 - loss: 0.3251
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

