### Llibreries

In [1]:
import cv2
import mediapipe as mp
import matplotlib.pyplot as plt
import numpy as np

### Funcions

Inicialitzem i guardem les funcions per fer el face mesh.
El 'index_lips' són tots els punts que defineixen els llavis.

In [2]:
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils
mp_face_detection = mp.solutions.face_detection
index_lips = [61, 76, 62, 78, 
              185, 184, 183, 191, 95, 96, 77, 146, 
              40, 74, 42, 80, 88, 89, 90, 91, 
              39, 73, 41, 81, 178, 179, 180, 181, 
              37, 72, 38, 82, 87, 86, 85, 84,
              0, 11, 12, 13, 14, 15, 16, 17,
              267, 302, 268, 312, 317, 316, 315, 314, 
              269, 303, 271, 311, 402, 403, 404, 405, 
              270, 304, 272, 310, 318, 319, 320, 321, 
              409, 408, 407, 415, 324, 325, 307, 375, 
              308, 292, 306, 291]

### Visualitzar tots els punts i els contorns de 'face_mesh'

In [3]:
with mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=1,
    min_detection_confidence=0.5) as face_mesh:
    
    image = cv2.imread('data/img/A_3/scene00126.jpg')
    height, width, _ = image.shape
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(image_rgb)
    
    if results.multi_face_landmarks is not None:
        for face_landmarks in results.multi_face_landmarks:
            mp_drawing.draw_landmarks(image, face_landmarks,
                        mp_face_mesh.FACEMESH_CONTOURS,
                        mp_drawing.DrawingSpec(color=(0,255,0), thickness=6, circle_radius=1),
                        mp_drawing.DrawingSpec(color=(0,255,0), thickness=6))

    image_resize = cv2.resize(image, (544, 960))    
    cv2.imwrite('data/face_mesh.jpg', image)
    cv2.imshow("Image", image_resize)
    cv2.waitKey(0)
cv2.destroyAllWindows()

### Visualitzar tots els punts de 'index_lips'

In [4]:
lip_info = []
lip_info_2 = []
mp_drawing_styles = mp.solutions.drawing_styles
with mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=1,
    min_detection_confidence=0.5) as face_mesh:
    
    image = cv2.imread('data/img/A_3/scene00126.jpg')
    height, width, _ = image.shape
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(image_rgb)
    
    if results.multi_face_landmarks is not None:
        for face_landmarks in results.multi_face_landmarks:
            for index in index_lips:
                lip_info.append([face_landmarks.landmark[index].x, face_landmarks.landmark[index].y, face_landmarks.landmark[index].z])
                lip_info_2.append([face_landmarks.landmark[index].x, face_landmarks.landmark[index].y])
                x = int(face_landmarks.landmark[index].x * width)
                y = int(face_landmarks.landmark[index].y * height)
                cv2.circle(image, (x, y), 2, (255, 0, 255), 12)    
                aux_l = face_landmarks.landmark[index]
                
    pos_x = []
    pos_y = []
    pos_z = []
    for i in lip_info:
        pos_x.append(i[0])
        pos_y.append(i[1])
        pos_z.append(i[2])
    aux_x = np.mean(pos_x)
    aux_y = np.mean(pos_y)
    aux_z = np.mean(pos_z)
    lip_info.append([aux_x, aux_y, aux_z])

    pos_x = []
    pos_y = []
    for i in lip_info_2:
        pos_x.append(i[0])
        pos_y.append(i[1])
    aux_x = np.mean(pos_x)
    aux_y = np.mean(pos_y)
    lip_info_2.append([aux_x, aux_y])

    image_resize = cv2.resize(image, (544, 960))
    cv2.imshow("Image", image_resize)
    cv2.waitKey(0)
cv2.destroyAllWindows()

### Visualitzar el requadre que conté el rostre amb 'FaceDetection'

Aquesta part pertany a les proves que van ser necessàries per fer el preprocessament del model que prediu paraules en angles.

In [5]:
lip_info = []
lip_info_2 = []
# Inicializar los objetos de los modelos
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Cargar la imagen
image_path = "data/img/A_3/scene00126.jpg"
image = cv2.imread(image_path)
ih, iw, _ = image.shape

# Convertir la imagen a RGB
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Realizar la detección de caras
results_detection = face_detection.process(image_rgb)

# Realizar el seguimiento de los puntos faciales si se detecta una cara
if results_detection.detections:
    for detection in results_detection.detections:
        bboxC = detection.location_data.relative_bounding_box
        xmin = int(bboxC.xmin * iw)
        ymin = int(bboxC.ymin * ih)
        w = int(bboxC.width * iw)
        h = int(bboxC.height * ih)
        xmax = xmin + w
        ymax = ymin + h
        bbox = int(bboxC.xmin * iw), int(bboxC.ymin * ih), \
               int(bboxC.width * iw), int(bboxC.height * ih)
        # Dibujar el cuadro delimitador de la cara
        cv2.rectangle(image, bbox, (0, 255, 0), 3)
        
        # Realizar el seguimiento de los puntos faciales
        results_mesh = face_mesh.process(image_rgb)
        
        # Dibujar los puntos faciales si se detectan
        if results_mesh.multi_face_landmarks:
            for face_landmarks in results_mesh.multi_face_landmarks:
                for index in index_lips:
                    lip_info.append([face_landmarks.landmark[index].x, face_landmarks.landmark[index].y, face_landmarks.landmark[index].z])
                    lip_info_2.append([face_landmarks.landmark[index].x, face_landmarks.landmark[index].y])
                    x = int(face_landmarks.landmark[index].x * iw)
                    y = int(face_landmarks.landmark[index].y * ih)
                    cv2.circle(image, (x, y), 2, (255, 0, 255), 12)
                    aux_l = face_landmarks.landmark[index]
                
    pos_x = []
    pos_y = []
    pos_z = []
    for i in lip_info:
        pos_x.append(i[0])
        pos_y.append(i[1])
        pos_z.append(i[2])
    aux_x = np.mean(pos_x)
    aux_y = np.mean(pos_y)
    aux_z = np.mean(pos_z)
    lip_info.append([aux_x, aux_y, aux_z])

    pos_x = []
    pos_y = []
    for i in lip_info_2:
        pos_x.append(i[0])
        pos_y.append(i[1])
    aux_x = np.mean(pos_x)
    aux_y = np.mean(pos_y)
    lip_info_2.append([aux_x, aux_y])      
    image_resize = cv2.resize(image, (544, 960))
    cv2.imshow("Image", image_resize)
    cv2.waitKey(0)
cv2.destroyAllWindows()

In [6]:
# Proyección ortográfica de los puntos del labio en el plano de la cara
face_center_x = (xmin + xmax) // 2
face_center_y = (ymin + ymax) // 2
face_center_z = 0  # Suponemos que la cara está en el mismo plano Z

In [7]:
projected_lip_points = []
for point in lip_info:
    # Aplicar la proyección ortográfica
    projected_x = (point[0]*iw) - face_center_x
    projected_y = (point[1]*ih) - face_center_y
    projected_z = point[2] - face_center_z
    projected_lip_points.append([projected_x, projected_y, projected_z])

In [8]:
# Calcular las distancias euclidianas
distances = []
for projected_point in projected_lip_points:
    distance = np.linalg.norm(projected_point)
    distances.append(distance)

In [9]:
distances

[211.98847000809292,
 206.87840504223834,
 200.3695157103275,
 194.93225206259385,
 179.03045161722855,
 177.25566484537478,
 173.85082096424904,
 164.6996701635703,
 216.9487249276778,
 222.86868843481324,
 229.81799713925824,
 237.1042667090333,
 148.60721864405826,
 149.6977432692294,
 151.81515609987997,
 146.75520018462421,
 229.03774751145912,
 237.56382517036482,
 251.5699266171585,
 264.99002421215016,
 113.23458375933961,
 124.19355563513217,
 133.04970904184512,
 133.03683931785622,
 240.71067701373133,
 254.48469379804925,
 274.1513511212464,
 295.0180915204672,
 79.91323477662009,
 103.36105935828152,
 121.00843053434699,
 126.82025862690543,
 250.4951474864787,
 268.9000406392005,
 293.81655619566965,
 316.32938648317594,
 93.01034311529592,
 111.28073089323615,
 125.75001837573794,
 131.9715054303722,
 258.4013491953956,
 279.82493491590384,
 303.49314307812006,
 327.1930237871127,
 117.62320513947834,
 132.56325637442004,
 143.39510167597143,
 145.13479890003106,
 262.10

In [10]:
lip_info[80]

[0.5362813234329223, 0.6204870626330375, -0.03463999899668124]

In [11]:
lip_info_2[0]

[0.384128600358963, 0.6135057210922241]

In [12]:
vocales_info_np = np.array(lip_info)
vocales_info_np_2 = np.array(lip_info_2)

In [13]:
len(vocales_info_np)

81

In [14]:
distancia = np.linalg.norm(vocales_info_np[0] - vocales_info_np[80])
distancia

0.1580784873779008

In [15]:
vocales_info_np[80]

array([ 0.53628132,  0.62048706, -0.03464   ])

In [16]:
coordenada_central = 80
img_lip_info_1 = []
for i in range(len(vocales_info_np)):
    distancia = np.linalg.norm(vocales_info_np[i] - vocales_info_np[coordenada_central])
    img_lip_info_1.append(distancia)

In [17]:
coordenada_central = 80
img_lip_info_2= []
for i in range(len(vocales_info_np_2)):
    distancia = np.linalg.norm(vocales_info_np_2[i] - vocales_info_np_2[coordenada_central])
    img_lip_info_2.append(distancia)