In [None]:
import pickle
import cv2
import mediapipe as mp
import pandas as pd
import os
os.environ["IMAGEIO_FFMPEG_EXE"] = "/usr/bin/ffmpeg"

import matplotlib.pyplot as plt
import subprocess
import glob, os
import numpy as np
initial_dir = os.getcwd()

df = pd.DataFrame()
frames = []
ids_without_required_hands = []

In [None]:
max_seq_length = 30 # Frames per video
num_samples = 1 # One video processed
num_features = 84 # 21 rows x, 21 rows y left and right = 84

In [None]:
def flat_X(X):
    # Define the number of rows to be flattened
    rows_to_flatten = max_seq_length

    data_array = X.to_numpy()

    # Get the number of resulting rows in the output array
    resulting_rows = data_array.shape[0] // rows_to_flatten

    # Reshape the array to have (resulting_rows, rows_to_flatten, 80) shape
    reshaped_array = data_array[:resulting_rows * rows_to_flatten].reshape(resulting_rows, rows_to_flatten, -1)

    # Flatten the reshaped array along the second axis (axis=1) to get (resulting_rows, 13600) shape
    flattened_array = reshaped_array.reshape(resulting_rows, -1)

    return flattened_array

In [None]:
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode


options = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path='hand_landmarker.task'),
    running_mode=VisionRunningMode.IMAGE,
    num_hands=2,
)
hands = HandLandmarker.create_from_options(options)

In [None]:
def normalize_coordinates(coordinates, target, image_height):
    matplot_coordinates = []
    normalized_coordinates = []
    for x, y in coordinates:
        y = image_height - y
        matplot_coordinates.append([x, y])

    # Find the minimum and maximum values among the coordinates
    min_x, min_y = np.min(matplot_coordinates, axis=0)
    max_x, max_y = np.max(matplot_coordinates, axis=0)

    # Normalize the coordinates
    normalized_coordinates = (matplot_coordinates - np.array([min_x, min_y])) / np.array([max_x - min_x, max_y - min_y])
    
    # Convert coordinates for plotting
    # visualize_data(normalized_coordinates, target)
    return normalized_coordinates

def visualize_data(normalized_coordinates, target):
    # Unzip normalized coordinates for plotting
    normalized_x, normalized_y = zip(*normalized_coordinates)
    
    # Plot the normalized coordinates
    plt.figure(figsize=(8, 6))
    plt.scatter(normalized_x, normalized_y, color='blue', label=f'{target}')
    plt.xlabel('Normalized X')
    plt.xlabel('Normalized X')
    plt.ylabel('Normalized Y')
    plt.title('Normalized Coordinates Plot')
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
def extract_video(video, target, sequence_id, real_path):
    global frames
    added_rows = 0
    # For webcam input:
    cap = cv2.VideoCapture(video)
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        

        name = f'{initial_dir}/test_frame.png'
        cv2.imwrite(name, frame)
        mp_image = mp.Image.create_from_file(name)
        hand_landmarker_result = hands.detect(mp_image)

         
        if len(hand_landmarker_result.handedness) > 0:
            row_data = {
                "sequence_id": sequence_id,
                "target": target,
                "file": real_path
            }
            hand_sides = ["Left", "Right"]
            for idx, landmarks in enumerate(hand_landmarker_result.hand_landmarks):
                detected_pixels = []
                hand_side = hand_sides[idx]
                # Iterate through detected hand landmarks
                for landmark_idx, landmark in enumerate(landmarks):
                    x, y = landmark.x, landmark.y
                    detected_pixels.append([x  * frame.shape[1], y * frame.shape[0]])
                    # Draw circles on the frame
                    cv2.circle(frame, (int(x * frame.shape[1]), int(y * frame.shape[0])), 5, (0, 255, 0), -1)

                    
                detected_pixels = normalize_coordinates(detected_pixels, target, frame.shape[0])
                for i in range(len(detected_pixels)):
                    x, y = detected_pixels[i]
                    row_data[f'x_{hand_side}_hand_{i}'] =  x
                    row_data[f'y_{hand_side}_hand_{i}'] =  y
                
            if (len(hand_landmarker_result.handedness) == 1):
                for i in range(21):
                    x, y = [0, 0]
                    row_data[f'x_{hand_sides[1]}_hand_{i}'] =  x
                    row_data[f'y_{hand_sides[1]}_hand_{i}'] =  y
            

            added_rows += 1
            frames.append(row_data)
        # cv2.imshow('Hand Tracking', frame)
        # if cv2.waitKey(1) & 0xFF == ord('q'):
        #     break
        if (added_rows == 30):
            break

    if (added_rows == 0):
        print("!! No hand detected in ", real_path)



    cap.release()
    cv2.destroyAllWindows()

In [None]:
def get_length(filename):
    result = subprocess.run(["ffprobe", "-v", "error", "-show_entries",
                             "format=duration", "-of",
                             "default=noprint_wrappers=1:nokey=1", filename],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT)
    return float(result.stdout)

In [None]:
def change_to_30_fps(video_input_path, video_output_path):
    duration = 1/get_length(video_input_path)

    c = f'ffmpeg -loglevel 0 -y -itsscale {duration} -i "' + video_input_path + f'" -filter:v fps=fps=30 "' + video_output_path + '"'
    subprocess.call(c, shell=True)

In [None]:
sequence_id = 0

def extract_coordinates_from_path(path):
    global sequence_id, initial_dir, validation_sequence_ids

    output_fps_path = f'./adjusted_fps_video.mp4'
    input_path = f'./{path}'
    sequence_id += 1

    if ("(" not in path):
        name = path.split(".")[0]
    else:
        name = path[0:path.index("(")].strip()


    change_to_30_fps(video_input_path=input_path, video_output_path=output_fps_path)
    extract_video(output_fps_path, name, sequence_id, input_path)

In [None]:
def padding_videos(df):
    # Create a new DataFrame to store the filled rows
    filled_df = pd.DataFrame()
    target = []


    # Iterate over each group and fill remaining rows with zero
    for _, group in df.groupby('sequence_id'):
        remaining_rows = max_seq_length - len(group)
        zeros_df = pd.DataFrame([[0] * len(group.columns)] * remaining_rows, columns=group.columns)
        zeros_df['sequence_id'] = group['sequence_id'].unique()[0]
        zeros_df['target'] = group['target'].unique()[0]
        group = pd.concat([group, zeros_df])
    
        # filled_df = filled_df.append(group)
        filled_df = pd.concat([filled_df, group])
        target.append(group["target"].unique()[0])
        
    filled_df.reset_index(drop=True, inplace=True)
    filled_df = filled_df.fillna(0)
    return filled_df, target

In [None]:
def load_model(model_name):
    # load
    with open(f'{model_name}.pkl', 'rb') as f:
        return pickle.load(f)
    
svm_model = load_model("svm")
cnn_model = load_model("cnn")
tree_model = load_model("tree")

In [None]:
path_to_extract = "Hello.mp4"
extract_coordinates_from_path(path_to_extract)

In [None]:
df = pd.DataFrame(frames)
df['sequence_id'] = df['sequence_id'].astype(int)

In [None]:
padded_data, target = padding_videos(df)
del padded_data["sequence_id"] 
del padded_data["target"] 
del padded_data["file"] 

In [None]:
# Load the label encoder
label_encoder = load_model('label_encoder.pkl')

In [None]:
X_val_cnn = padded_data.values.reshape(num_samples, max_seq_length, num_features)
X_val = flat_X(padded_data)

In [None]:
predicted_cnn = cnn_model.predict(X_val_cnn)
predicted_tree = tree_model.predict(X_val)
predicted_svm = svm_model.predict(X_val)

In [None]:
most_likely_predictions = np.argmax(predicted_cnn, axis=1)
predicted_cnn = label_encoder.inverse_transform(most_likely_predictions)

In [None]:
print("----Resultados----")
print(f'CNN: {predicted_cnn}')
print(f'TREE: {predicted_tree}')
print(f'CNN: {predicted_cnn}')