## Imports

In [465]:
import cv2
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
import mediapipe as mp
import matplotlib.pyplot as plt

from mediapipe.framework.formats import detection_pb2
from mediapipe.framework.formats import location_data_pb2
from mediapipe.framework.formats import landmark_pb2

from mediapipe.python.solutions.pose import PoseLandmark
from mediapipe.python.solutions.drawing_utils import DrawingSpec

In [466]:
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose

## Process Own Videos

In [518]:
custom_style = mp_drawing_styles.get_default_pose_landmarks_style()
custom_connections = list(mp_pose.POSE_CONNECTIONS)

In [519]:
# custom_style

In [522]:
# custom_connections

In [523]:
excluded_landmarks = [
    PoseLandmark.LEFT_EYE_INNER, 
    PoseLandmark.LEFT_EYE_OUTER, 
    PoseLandmark.LEFT_EAR, 
    PoseLandmark.LEFT_PINKY,
    PoseLandmark.LEFT_EAR,
    PoseLandmark.LEFT_PINKY,
    PoseLandmark.LEFT_INDEX,
    PoseLandmark.LEFT_THUMB,
    PoseLandmark.LEFT_FOOT_INDEX,
    PoseLandmark.RIGHT_EYE_INNER, 
    PoseLandmark.RIGHT_EYE_OUTER, 
    PoseLandmark.RIGHT_EAR, 
    PoseLandmark.RIGHT_PINKY,
    PoseLandmark.RIGHT_EAR,
    PoseLandmark.RIGHT_PINKY,
    PoseLandmark.RIGHT_INDEX,
    PoseLandmark.RIGHT_THUMB,
    PoseLandmark.RIGHT_FOOT_INDEX,
]

In [524]:
len(excluded_landmarks)

18

In [525]:
for landmark in excluded_landmarks:
    # we change the way the excluded landmarks are drawn
    custom_style[landmark] = DrawingSpec(color=(255,0,0), thickness=None, circle_radius=2)
    # we remove all connections which contain these landmarks
    custom_connections = [connection_tuple for connection_tuple in custom_connections 
                            if landmark.value not in connection_tuple]

In [526]:
# custom_style

In [527]:
# custom_connections

In [528]:
pose = mp_pose.Pose(model_complexity = 2, 
                    min_detection_confidence = 0.5, 
                    min_tracking_confidence = 0.5)

I0000 00:00:1726812223.976020 3603984 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M3 Pro
W0000 00:00:1726812224.041452 3686473 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726812224.061651 3686483 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [501]:
keypoint_names = [
    'nose',
    'left_eye_inner', 'left_eye', 'left_eye_outer',
    'right_eye_inner', 'right_eye', 'right_eye_outer',
    'left_ear', 'right_ear',
    'mouth_left', 'mouth_right',
    'left_shoulder', 'right_shoulder',
    'left_elbow', 'right_elbow',
    'left_wrist', 'right_wrist',
    'left_pinky', 'right_pinky',
    'left_index', 'right_index',
    'left_thumb', 'right_thumb',
    'left_hip', 'right_hip',
    'left_knee', 'right_knee',
    'left_ankle', 'right_ankle',
    'left_heel', 'right_heel',
    'left_foot_index','right_foot_index',
]

W0000 00:00:1726812107.500290 3685430 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726812107.523197 3685440 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [502]:
keypoint_names_orientation = [
    'nose',
    'left_eye', 'right_eye',
    'left_shoulder', 'right_shoulder'
]

In [503]:
keypoint_names_body = [
    'nose',
    'left_eye', 'right_eye',
    'left_shoulder', 'right_shoulder',
    'left_elbow', 'right_elbow',
    'left_wrist', 'right_wrist',
    'left_hip', 'right_hip',
    'left_knee', 'right_knee',
    'left_ankle', 'right_ankle',
    'left_heel', 'right_heel',
]

In [507]:
def handle_video(fname_video_in=None, fname_video_out=None, fps_out=10):

    video_in = cv2.VideoCapture(fname_video_in)

    fps_in = int( np.round( video_in.get( cv2.CAP_PROP_FPS ) ) )
    skip   = int( fps_in/fps_out )

    w_frame = int(video_in.get(3)) 
    h_frame = int(video_in.get(4))
    size = (w_frame, h_frame)    
    
    num_frames = int(video_in.get(cv2.CAP_PROP_FRAME_COUNT))

    print('Reading video data from {}...\n(frame_w, frame_h) = {}; num_frames: {}\n********'.format(
        fname_video_in, size, num_frames))

    video_out = cv2.VideoWriter(fname_video_out, 
                                cv2.VideoWriter_fourcc(*'mp4v'), 
                                fps_out, 
                                size)

    print('Processing video...')
    fnum2landmarks = {}
    for fnum in tqdm( range(num_frames) ):
    
        ret, frame = video_in.read()

        if ret:
            if fnum % skip == 0:

                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

                # pass frame by reference for optimization
                frame_rgb.flags.writeable = False
                results = pose.process(frame_rgb)

                # only measure landmarks if detected
                if results.pose_landmarks is not None:
                
                    landmarks = {}
                    for i, lm in enumerate( results.pose_landmarks.landmark ):
                        if keypoint_names[i] in keypoint_names_orientation:
                            landmarks[keypoint_names[i]] = np.asarray( [ int(w*lm.x), int(h*lm.y) ] )
                    fnum2landmarks[fnum] = landmarks

                    # make frame writeable for drawing
                    frame_rgb.flags.writeable = True
                    mp_drawing.draw_landmarks(
                        frame_rgb,
                        landmark_list= results.pose_landmarks,
                        # # defaults
                        # mp_pose.POSE_CONNECTIONS,
                        # landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
                        connections = custom_connections, #  passing the modified connections list
                        landmark_drawing_spec = custom_style, # and drawing style
                    )

                # write out to output video file
                video_out.write( cv2.cvtColor( frame_rgb, cv2.COLOR_RGB2BGR) )

        # break out of loop if end of video has been reached
        else:
            print('Saving results to {}'.format(fname_video_out))
            break
    
    # release handles
    video_in.release() 
    video_out.release() 
    
    return fnum2landmarks

In [535]:
fname_video_in  = './raw_videos/video_id1.mp4'
fname_video_out = './processed_videos/video_id1.mp4'
fps_out = 15

In [536]:
fnum2landmarks = handle_video(fname_video_in=fname_video_in,
                              fname_video_out=fname_video_out,
                              fps_out=fps_out)

Reading video data from ./raw_videos/video_id1.mp4...
(frame_w, frame_h) = (1920, 1080); num_frames: 2401
********
Processing video...


  <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_file-attachment-element_dist_index_js-node_modules_primer_view-co-278f98-614627bd58c2.js"></script>
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2401/2401 [01:15<00:00, 31.94it/s]


## Calculate Orientation of Baby

In [531]:
def get_order(eyes, nose, shoulders, axis=0):
    order = {'E': eyes, 'N': nose, 'S': shoulders}
    order_sortByAxis = {k: v for k, v in sorted(order.items(), key=lambda item: item[1][axis])}        
    return '{}_{}'.format(''.join(list(order_sortByAxis.keys())), axis)

In [532]:
def get_orientation(fnum2landmarks):

    order_votes_x = []
    order_votes_y = []
    
    for fnum, landmarks in fnum2landmarks.items():
    
        ([nose_x, nose_y], 
         [left_eye_x, left_eye_y], 
         [right_eye_x, right_eye_y], 
         [left_shoulder_x, left_shoulder_y], 
         [right_shoulder_x, right_shoulder_y]) = landmarks.values()
    
        nose = [ nose_x, nose_y ]
        eyes = [ (left_eye_x+right_eye_x)/2 , (left_eye_y+right_eye_y)/2 ]
        shoulders = [ (left_shoulder_x+right_shoulder_x)/2 , (left_shoulder_y+right_shoulder_y)/2 ]
    
        order_votes_x.append( get_order(eyes, nose, shoulders, axis=0) )
        order_votes_y.append( get_order(eyes, nose, shoulders, axis=1) )
    
    order_votes = order_votes_x + order_votes_y

    # print(pd.Series(order_votes).value_counts())
    
    dominant_order = pd.Series(order_votes).value_counts().index[0]
    
    order2orientation = {
        'ENS_1': 'Up',
        'SNE_1': 'Down',
        'ENS_0': 'Left',
        'SNE_0': 'Right',
    }
    
    return order2orientation[dominant_order]

In [533]:
orientation = get_orientation(fnum2landmarks)

In [534]:
orientation

'Left'

In [None]:
df_meta = pd.read_csv('./list_of_subjects_726_withFNAMES.csv')

In [None]:
df_meta

In [None]:
fname_data = df_meta.iloc[-1]['fname_data']

In [None]:
fname_data

In [None]:
df_data = pd.read_csv( fname_data )

In [None]:
df_data

In [None]:
df_data.shape