In [1]:
import cv2
import mediapipe as mp
import pandas as pd
import os
os.environ["IMAGEIO_FFMPEG_EXE"] = "/usr/bin/ffmpeg"

import matplotlib.pyplot as plt
import subprocess
import glob, os
import numpy as np
initial_dir = os.getcwd()

df = pd.DataFrame()
frames = []
ids_without_required_hands = []

In [2]:
last_sequence_id = 0
path_past_data = f'{initial_dir}/data/data.csv'
if (os.path.exists(path_past_data)):
    past_df = pd.read_csv(path_past_data)
    last_sequence_id = past_df["sequence_id"].max()

print(last_sequence_id)

254


In [3]:
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode


options = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path='hand_landmarker.task'),
    running_mode=VisionRunningMode.IMAGE,
    num_hands=2,
)
hands = HandLandmarker.create_from_options(options)


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [4]:
def normalize_coordinates(coordinates, target, image_height):
    matplot_coordinates = []
    normalized_coordinates = []
    for x, y in coordinates:
        y = image_height - y
        matplot_coordinates.append([x, y])

    # Find the minimum and maximum values among the coordinates
    min_x, min_y = np.min(matplot_coordinates, axis=0)
    max_x, max_y = np.max(matplot_coordinates, axis=0)

    # Normalize the coordinates
    normalized_coordinates = (matplot_coordinates - np.array([min_x, min_y])) / np.array([max_x - min_x, max_y - min_y])
    
    # Convert coordinates for plotting
    # visualize_data(normalized_coordinates, target)
    return normalized_coordinates

def visualize_data(normalized_coordinates, target):
    # Unzip normalized coordinates for plotting
    normalized_x, normalized_y = zip(*normalized_coordinates)
    
    # Plot the normalized coordinates
    plt.figure(figsize=(8, 6))
    plt.scatter(normalized_x, normalized_y, color='blue', label=f'{target}')
    plt.xlabel('Normalized X')
    plt.xlabel('Normalized X')
    plt.ylabel('Normalized Y')
    plt.title('Normalized Coordinates Plot')
    plt.legend()
    plt.grid(True)
    plt.show()

In [5]:
two_hands_words = ["familia", "por favor", "ayuda", "amor", "casa", "escuela", "salud", "feliz"]
def extract_video(video, target, sequence_id, real_path):
    global frames, ids_without_required_hands, two_hands_words
    added_rows = 0
    detected_two_hands = False
    # For webcam input:
    cap = cv2.VideoCapture(video)
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        

        name = f'{initial_dir}/test_frame.png'
        cv2.imwrite(name, frame)
        mp_image = mp.Image.create_from_file(name)
        hand_landmarker_result = hands.detect(mp_image)

         
        if len(hand_landmarker_result.handedness) > 0:
            row_data = {
                "sequence_id": sequence_id,
                "target": target,
                "file": real_path
            }
            hand_sides = ["Left", "Right"]
            for idx, landmarks in enumerate(hand_landmarker_result.hand_landmarks):
                detected_pixels = []
                hand_side = hand_sides[idx]
                # Iterate through detected hand landmarks
                for landmark_idx, landmark in enumerate(landmarks):
                    x, y = landmark.x, landmark.y
                    detected_pixels.append([x  * frame.shape[1], y * frame.shape[0]])
                    # Draw circles on the frame
                    cv2.circle(frame, (int(x * frame.shape[1]), int(y * frame.shape[0])), 5, (0, 255, 0), -1)

                    
                detected_pixels = normalize_coordinates(detected_pixels, target, frame.shape[0])
                for i in range(len(detected_pixels)):
                    x, y = detected_pixels[i]
                    row_data[f'x_{hand_side}_hand_{i}'] =  x
                    row_data[f'y_{hand_side}_hand_{i}'] =  y
                
            if (len(hand_landmarker_result.handedness) == 1):
                for i in range(21):
                    x, y = [0, 0]
                    row_data[f'x_{hand_sides[1]}_hand_{i}'] =  x
                    row_data[f'y_{hand_sides[1]}_hand_{i}'] =  y
            

            if (len(hand_landmarker_result.handedness) == 2):
                detected_two_hands = True

            added_rows += 1
            frames.append(row_data)
        # cv2.imshow('Hand Tracking', frame)
        # if cv2.waitKey(1) & 0xFF == ord('q'):
        #     break
        if (added_rows == 30):
            break

    if (added_rows == 0):
        print("!! No hand detected in ", real_path)

    if (not detected_two_hands and target in two_hands_words):
        print(">> No se detectaron las dos manos necesarias en ", real_path, " con id ", sequence_id)
        ids_without_required_hands.append(sequence_id)


    cap.release()
    cv2.destroyAllWindows()

In [6]:
def get_length(filename):
    result = subprocess.run(["ffprobe", "-v", "error", "-show_entries",
                             "format=duration", "-of",
                             "default=noprint_wrappers=1:nokey=1", filename],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT)
    return float(result.stdout)

In [7]:
def change_to_30_fps(video_input_path, video_output_path):
    duration = 1/get_length(video_input_path)

    c = f'ffmpeg -loglevel 0 -y -itsscale {duration} -i "' + video_input_path + f'" -filter:v fps=fps=30 "' + video_output_path + '"'
    subprocess.call(c, shell=True)

In [8]:
sequence_id = last_sequence_id
validation_sequence_ids = []

def extract_coordinates_from_dir(dir, is_val=False):
    global sequence_id, initial_dir, validation_sequence_ids
    os.chdir(dir)

    output_fps_path = f'{dir}/adjusted_fps_video.mp4'

    files_to_extract = glob.glob("*.mp4")
    print(f'Hay {len(files_to_extract)} videos a extraer coordenadas')
    for file in files_to_extract:
        if ("adjusted_fps_video.mp4" in file):
            return
            
        input_path = f'{dir}/{file}'
        sequence_id += 1
        if (is_val):
            validation_sequence_ids.append(sequence_id)

        if ("(" not in file):
            name = file.split(".")[0]
        else:
            name = file[0:file.index("(")].strip()


        change_to_30_fps(video_input_path=input_path, video_output_path=output_fps_path)
        extract_video(output_fps_path, name, sequence_id, input_path)

    os.chdir(initial_dir)

In [9]:
training_dir = f'{initial_dir}/data/videos_lensegua_2'
extract_coordinates_from_dir(training_dir)

Hay 363 videos a extraer coordenadas
>> No se detectaron las dos manos necesarias en  /Users/andreaamaya/Trabajo/megaproyecto_model/data/videos_lensegua_2/casa (12).mp4  con id  287
>> No se detectaron las dos manos necesarias en  /Users/andreaamaya/Trabajo/megaproyecto_model/data/videos_lensegua_2/amor (5).mp4  con id  292
>> No se detectaron las dos manos necesarias en  /Users/andreaamaya/Trabajo/megaproyecto_model/data/videos_lensegua_2/casa (10) copy.mp4  con id  296
>> No se detectaron las dos manos necesarias en  /Users/andreaamaya/Trabajo/megaproyecto_model/data/videos_lensegua_2/amor (2) copy.mp4  con id  299
>> No se detectaron las dos manos necesarias en  /Users/andreaamaya/Trabajo/megaproyecto_model/data/videos_lensegua_2/escuela (17).mp4  con id  317
!! No hand detected in  /Users/andreaamaya/Trabajo/megaproyecto_model/data/videos_lensegua_2/comida (16).mp4
>> No se detectaron las dos manos necesarias en  /Users/andreaamaya/Trabajo/megaproyecto_model/data/videos_lensegua_2/

In [10]:
validation_dir = f'{initial_dir}/data/videos_val_2'
extract_coordinates_from_dir(validation_dir, is_val=True)

Hay 27 videos a extraer coordenadas


In [11]:
df = pd.DataFrame(frames)

In [12]:
df['sequence_id'] = df['sequence_id'].astype(int)

In [13]:
df.head()

Unnamed: 0,sequence_id,target,file,x_Left_hand_0,y_Left_hand_0,x_Left_hand_1,y_Left_hand_1,x_Left_hand_2,y_Left_hand_2,x_Left_hand_3,...,x_Right_hand_16,y_Right_hand_16,x_Right_hand_17,y_Right_hand_17,x_Right_hand_18,y_Right_hand_18,x_Right_hand_19,y_Right_hand_19,x_Right_hand_20,y_Right_hand_20
0,255,feliz,/Users/andreaamaya/Trabajo/megaproyecto_model/...,0.062766,0.031105,0.0,0.267209,0.088161,0.586451,0.130851,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,255,feliz,/Users/andreaamaya/Trabajo/megaproyecto_model/...,0.008392,0.022065,0.0,0.262948,0.130833,0.57411,0.20049,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,255,feliz,/Users/andreaamaya/Trabajo/megaproyecto_model/...,0.0,0.0,0.024851,0.289231,0.179532,0.592871,0.271284,...,0.512381,0.129533,0.594147,0.03922,0.378976,0.032075,0.463215,0.00542,0.554625,0.0
3,255,feliz,/Users/andreaamaya/Trabajo/megaproyecto_model/...,0.0,0.02879,0.057657,0.330858,0.214202,0.600497,0.322143,...,0.586396,0.139624,0.607326,0.12332,0.470047,0.014264,0.551923,0.0,0.629357,0.023692
4,255,feliz,/Users/andreaamaya/Trabajo/megaproyecto_model/...,0.0,0.037505,0.084723,0.350202,0.245635,0.611125,0.341049,...,0.574911,0.135054,0.61105,0.149616,0.489662,0.010995,0.569037,0.0,0.636393,0.038298


In [14]:
len(df.columns)

87

In [15]:
if (len(validation_sequence_ids) > 0):
    validation_path = f'{initial_dir}/data/validation.csv'
    validation_df = df[df['sequence_id'].isin(validation_sequence_ids)][["sequence_id", "target", "file"]]
    
    train_path = f'{initial_dir}/data/train.csv'
    train_df = df[~df['sequence_id'].isin(validation_sequence_ids)][["sequence_id", "target", "file"]]
    
    if (last_sequence_id == 0):
        validation_df.to_csv(validation_path, index=False)
        train_df.to_csv(train_path, index=False)
    else:
        df_past_val = pd.read_csv(validation_path)
        df_past_train = pd.read_csv(train_path)

        df_val_final = pd.concat([df_past_val, validation_df])
        df_train_final = pd.concat([df_past_train, train_df])

        df_val_final.to_csv(validation_path, index=False)
        df_train_final.to_csv(train_path, index=False)

In [16]:
data_path = f'{initial_dir}/data/data.csv'
df_final = df.drop("file", axis=1)

if (last_sequence_id == 0):
    df_final.to_csv(data_path, index=False)
else:
    df_past_data = pd.read_csv(data_path)
    df_data_final = pd.concat([df_past_data, df_final])
    df_data_final.to_csv(data_path, index=False)

In [17]:
frames = []
extra_val_dir = f'{initial_dir}/data/test videos'
extract_coordinates_from_dir(extra_val_dir)

df = pd.DataFrame(frames) 
df['sequence_id'] = df['sequence_id'].astype(int)

Hay 10 videos a extraer coordenadas


In [18]:
data_path = f'{initial_dir}/data/data_tests.csv'
df.to_csv(data_path, index=False)