# INSTALACIONES

In [None]:
!pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 opencv-python mediapipe sklearn matplotlib

# IMPORTS 

In [2]:
import cv2
import numpy as np
import os
import mediapipe

# Funcionalidades para MediaPipe

- Import de modelo
- mediapipe_detection: Deteccion de Puntos Corporales por frame/imagen
- draw_styled_landmarks: dibuja puntos corporales sobre la imagen/frame recibido
- array_from_landmarks: retorna un arreglo concatenando puntos de [cara, pose, mano izquierda, mano derecha] para los resultados de 1 frame

In [None]:
mp_holistic = mediapipe.solutions.holistic
mp_drawing = mediapipe.solutions.drawing_utils

In [None]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)                 #prediction from a frame
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

In [5]:
                # [color1 (B), color2 (G), color3 (R), thickness, circleRadius]
leftHandStyles =  [0, 138, 255, 2, 1] #naranja
rightHandStyles = [231, 217, 0, 2, 1] #celeste
faceStyles =      [80, 110, 10, 0, 1]
poseStyles =      [70, 100, 5, 2, 1]

def draw_styled_landmarks(image, results):
    #     FACE
    mp_drawing.draw_landmarks(
        image,
        results.face_landmarks,
        mp_holistic.FACEMESH_CONTOURS,
        mp_drawing.DrawingSpec(color=(faceStyles[0],faceStyles[1],faceStyles[2]), thickness=faceStyles[3], circle_radius=faceStyles[4])
    )
    #     POSE/BODY
    mp_drawing.draw_landmarks(
        image,
        results.pose_landmarks,
        mp_holistic.POSE_CONNECTIONS,
        mp_drawing.DrawingSpec(color=(poseStyles[0],poseStyles[1],poseStyles[2]), thickness=poseStyles[3], circle_radius=poseStyles[4])
    )
    #     LEFT HAND
    mp_drawing.draw_landmarks(
        image,
        results.left_hand_landmarks,
        mp_holistic.HAND_CONNECTIONS,
        mp_drawing.DrawingSpec(color=(leftHandStyles[0],leftHandStyles[1],leftHandStyles[2]), thickness=leftHandStyles[3], circle_radius=leftHandStyles[4])
    )
    #     RIGHT HAND
    mp_drawing.draw_landmarks(
        image,
        results.right_hand_landmarks,
        mp_holistic.HAND_CONNECTIONS,
        mp_drawing.DrawingSpec(color=(rightHandStyles[0],rightHandStyles[1],rightHandStyles[2]), thickness=rightHandStyles[3], circle_radius=rightHandStyles[4])
    )

In [None]:
# THE SAME FROM ABOVE BUT WITHOUT z and visibility
def array_from_landmarks(results):
    # LEFT HAND array
    # len(results.left_hand_landmarks.landmark) = 21 landmarks for each hand, with 2 coordinates each landmark
    if (results.left_hand_landmarks):
        leftHandLandmarks = np.array([[result.x, result.y] for result in results.left_hand_landmarks.landmark]).flatten()
    else:
        leftHandLandmarks = np.zeros(21*2)


    # RIGHT HAND array
    # len(results.right_hand_landmarks.landmark) -> same for right hand
    if (results.right_hand_landmarks):
        rightHandLandmarks = np.array([[result.x, result.y ] for result in results.right_hand_landmarks.landmark]).flatten()
    else:
        rightHandLandmarks = np.zeros(21*2)


    # POSE array
    # len(results.pose_landmarks.landmark) -> 33 landmarks of 2 coordinates each one (X, Y)
    if (results.pose_landmarks):
        poseLandmarks = np.array([[result.x, result.y] for result in results.pose_landmarks.landmark]).flatten()
    else:
        poseLandmarks = np.zeros(33*2)


    # FACE array
    # len(results.face_landmarks.landmark) -> 468 landmarks of 2 coordinates each one
    if (results.face_landmarks):
        faceLandmarks = np.array([[result.x, result.y] for result in results.face_landmarks.landmark]).flatten()
    else:
        faceLandmarks = np.zeros(468*2)
        
    return np.concatenate((faceLandmarks, poseLandmarks, leftHandLandmarks, rightHandLandmarks))

# TRATADO DE VIDEOS

- Celda que retorna los valores maximos y minimos que tienen los videos de una seña.
- Definicion de: Path/Ruta de alamcenamiento, Lista de Palabras, Cantidad de Personas, Cantidad de Videos por Persona y numero maximo de Frames (hallado con la celda superior).
- Creacion de carpetas para almacenar los resultados de cada video
- Funcion que realiza Padding a derecha a cada video que no alcanza la cantidad de frames "maximumNumberOfFrames", esto hace que todos los arreglos generados tengan la misma longitud.
- Procesado de videos, se procesa cada frame de cada video con MediaPipe y se almacena el resultado de cada video como un arreglo de frames, donde cada frame tiene la concatenacion de los puntos corporales que entrega array_from_landmarks(). en un archivo de extension ".npy"

In [None]:
max_frames = np.zeros(64)
min_frames = [400 for i in range(64)]
for i in range(64):
    for j in range(10):
        for k in range(5):
            cap = cv2.VideoCapture(f'''LSA64/all_cut/0{str(i+1).zfill(2)}_0{str(j+1).zfill(2)}_00{k+1}.mp4''')
            length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            if length > max_frames[i]:
                max_frames[i] = length
            if length < min_frames[i]:
                min_frames[i] = length
print(max_frames)
print(min_frames)
i = np.argmax(max_frames)
j = np.argmin(min_frames)
print(f'''Maximo numero de frames {max_frames[i]}. Seña: {i}''')
print(f'''Minimo numero de frames {min_frames[j]}. Seña: {j}''')

In [3]:
STORE_PATH2 = os.path.join('LSA64_data')

# Lista de Palabras con las que se armarán las oraciones, no se encuentran las 64 palabras
signs_list = ['nacer','comida','brillante', 'mujer', 'hijo', 'hombre', 'lejos', 'aprender', 'espumadera','amargo','leche','Uruguay','pais','donde','ninguno','nombre','perfume','sordo','comprar','encontrar', 'nave espacial']
numberOfPersons = 10
numberOfVideosPerPerson = 5

maximumNumberOfFrames = 201

In [None]:
# CREACION DE CARPETAS
for sign in signs_list:
    try:
        os.makedirs(os.path.join(STORE_PATH2, sign))
        print(f'''Make dir. {sign} ready''')
    except FileExistsError:
        print(f'''Error: {FileExistsError}, {sign}''')

In [None]:
# valor de padding '3' ya que los valores del arreglo estan normalizados entre [0.0, 1.0]
paddingValue = 3
def paddData(sequence):
    paddLength = maximumNumberOfFrames - len(sequence)
    return np.pad(sequence, [(0,paddLength),(0,0)], mode='constant', constant_values=paddingValue)

In [None]:
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holisticModel:
    for sign in range(49,len(signs_list)):
        for personNumber in range(numberOfPersons):
            for videoNumber in range(numberOfVideosPerPerson):
                caption = cv2.VideoCapture(f'''all_cut/0{str(sign+1).zfill(2)}_0{str(personNumber+1).zfill(2)}_00{videoNumber+1}.mp4''')
                print(f'''Leo: 0{str(sign+1).zfill(2)}_0{str(personNumber+1).zfill(2)}_00{videoNumber+1}.mp4''')
                if (caption.isOpened() == False):
                    print("Error opening video stream or file")
                    break
                keypointsSequence = []
                while(caption.isOpened()):
                    ret, frame = caption.read()
                    if (ret):
                        image, results = mediapipe_detection(frame, holisticModel)
#                         draw_styled_landmarks(image, results)
#                         cv2.imshow('Frame',image) 
                        keypoints = array_from_landmarks(results)
                        keypointsSequence.append(keypoints)
                    else:
                        break
                    if cv2.waitKey(25) & 0xFF == ord('q'):
                        # Break the loop
                        break
                a = keypointsSequence
                if len(keypointsSequence) < maximumNumberOfFrames:
                    a = paddData(keypointsSequence)
                localPath = os.path.join(STORE_PATH2, signs_list[sign], f'''0{str(personNumber)}_0{str(videoNumber)}''')
                np.save(localPath, a)
                print(f'''Guardo en: {STORE_PATH2}/{signs_list[sign]}/0{str(personNumber)}_0{str(videoNumber)}''')
# release the video capture object
caption.release()
# Closes all the frames
cv2.destroyAllWindows()