# Body tracking with mediapipe/Blazepose
The following code loops through all the videos and extract body poses using the MediaPipe (ref 1: Mediapipe Body tracking) package which utilizes blazepose for body pose estimation (ref 2: Bazarevsky et al., 2020). The more heavy models of Blazepose (e.g., "the full model") that we are using here has been found to outperform OpenPose on yoga/fitness tracking videos, but the kicker lies in its speed: its 25 to 75 times faster than OpenPose when running on a CPU (see ref 2).


### Questions: Wim Pouw (wim.pouw@donders.ru.nl) & James Trujillo (james.trujillo@donders.ru.nl)

In [35]:
#https://www.youtube.com/watch?v=NZde8Xt78Iw
%config Completer.use_jedi = False
import cv2
import mediapipe
import pandas as pd
import numpy as np
import csv
import os

drawingModule = mediapipe.solutions.drawing_utils
poseModule = mediapipe.solutions.pose

In [36]:
from os import listdir
from os.path import join, isfile
#list all videos in mediafolder
videopath = "./Multimedia/Video/"
onlyfiles = [f for f in listdir(videopath) if isfile(join(videopath, f))]
##time series output folder
foldtime = "./Timeseries_Output/"

In [37]:
############Initializing some functions and variables

#take some google classification object and convert it into a string
def makegoginto_str(gogobj):
    gogobj = str(gogobj).strip("[]")
    gogobj = gogobj.split("\n")
    return(gogobj[:-1]) #ignore last element as this has nothing

#landmarks 33x
markers = ['NOSE', 'LEFT_EYE_INNER', 'LEFT_EYE', 'LEFT_EYE_OUTER', 'RIGHT_EYE_OUTER', 'RIGHT_EYE', 'RIGHT_EYE_OUTER',
          'LEFT_EAR', 'RIGHT_EAR', 'MOUTH_LEFT', 'MOUTH_RIGHT', 'LEFT_SHOULDER', 'RIGHT_SHOULDER', 'LEFT_ELBOW', 
          'RIGHT_ELBOW', 'LEFT_WRIST', 'RIGHT_WRIST', 'LEFT_PINKY', 'RIGHT_PINKY', 'LEFT_INDEX', 'RIGHT_INDEX',
          'LEFT_THUMB', 'RIGHT_THUMB', 'LEFT_HIP', 'RIGHT_HIP', 'LEFT_KNEE', 'RIGHT_KNEE', 'LEFT_ANKLE', 'RIGHT_ANKLE',
          'LEFT_HEEL', 'RIGHT_HEEL', 'LEFT_FOOT_INDEX', 'RIGHT_FOOT_INDEX']

#check if there are numbers in a string
def num_there(s):
    return any(i.isdigit() for i in s)

#make the stringifyd position traces into clean values
def listpostions(newsamplemarks):
    tracking_p = []
    for value in newsamplelmarks:
        if num_there(value):
            stripped = value.split(':', 1)[1]
            stripped = stripped.strip() #remove spaces in the string if present
            tracking_p.append(stripped) #add to this list  
    return(tracking_p)

In [38]:
for ff in onlyfiles:
    capture = cv2.VideoCapture(videopath+ff)
    frameWidth = capture.get(cv2.CAP_PROP_FRAME_WIDTH)
    frameHeight = capture.get(cv2.CAP_PROP_FRAME_HEIGHT)
    fps = capture.get(cv2.CAP_PROP_FPS)
    print(frameWidth, frameHeight, fps )
    #pose tracking with keypoints save!
    #make a video file
    samplerate = fps #make the same as current video
    fourcc = cv2.VideoWriter_fourcc(*'MP4V') #(*'XVID')
    out = cv2.VideoWriter('Videotracking_output/'+ff[:-4]+'.mp4', fourcc, fps = samplerate, frameSize = (int(frameWidth), int(frameHeight)))


    #make a variable list with x, y, z, info where data is appended to
    markerxyz = []
    for mark in markers:
        for pos in ['X', 'Y', 'Z', 'visibility']:
            nm = pos + "_" + mark
            markerxyz.append(nm)
    addvariable = ['time']
    addvariable.extend(markerxyz)

    time = 0
    timeseries = [addvariable]
    #MAIN ROUTINE
    with poseModule.Pose(min_detection_confidence=0.5, model_complexity = 1, min_tracking_confidence=0.75, smooth_landmarks = True) as pose:
         while (True):
            ret, frame = capture.read()
            if ret == True:
                results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                if results.pose_landmarks != None:
                    newsamplelmarks = makegoginto_str(results.pose_world_landmarks)
                    newsamplelmarks = listpostions(newsamplelmarks)
                    fuldataslice = [str(time)]
                    fuldataslice.extend(newsamplelmarks) #add positions
                    timeseries.append(fuldataslice) #append to the timeries data
                        #get information about hand index [0], hand confidence [1], handedness [2]              
                    drawingModule.draw_landmarks(frame, results.pose_landmarks, poseModule.POSE_CONNECTIONS)
                cv2.imshow('MediaPipe Pose', frame)
                out.write(frame)  ################################################comment this if you dont want to make a video
                time = time+(1000/samplerate)
                if cv2.waitKey(1) == 27:
                    break
            if ret == False:
                break
    out.release()
    capture.release()
    cv2.destroyAllWindows()

    ####################################################### data to be written row-wise in csv fil
    data = timeseries
    # opening the csv file in 'w+' mode
    file = open(foldtime + ff[:-4]+'.csv', 'w+', newline ='')
    #write it
    with file:    
        write = csv.writer(file)
        write.writerows(data)

250.0 480.0 30.0
