# Google MediaPipe
Googles [MediaPipe](https://google.github.io/mediapipe/) is currently (as of 26.12.2020) in alpha state.

It can be installed on Jetson Nano by following [given instructions](https://google.github.io/mediapipe/getting_started/install.html#installing-on-debian-and-ubuntu)
. Python package needs to be build on Jetson Nano, following [these instructions](https://google.github.io/mediapipe/getting_started/python.html#building-mediapipe-python-package)

TLDR:
```bash
# First install Bazel 3.4.1
mkdir $HOME/bazel-3.4.1
cd $HOME/bazel-3.4.1
wget https://github.com/bazelbuild/bazel/releases/download/3.4.1/bazel-3.4.1-dist.zip
sudo apt-get install build-essential openjdk-8-jdk python zip unzip
unzip bazel-3.4.1-dist.zip
env EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk" bash ./compile.sh
sudo cp output/bazel /usr/local/bin/

# Then the MediaPipe
cd $HOME
git clone https://github.com/google/mediapipe.git
cd mediapipe
# if you have not updated OpenCV to 4.X (compiled from source)
sed -i "s/x86_64-linux-gnu/aarch64-linux-gnu/g" third_party/opencv_linux.BUILD
# else
sed -i "s/x86_64-linux-gnu//g" third_party/opencv_linux.BUILD
sed -i "s/include/include\/opencv4/g" third_part/opencv_linux.BUILD
# then
sudo -H pip3 install -r requirements.txt
python3 setup.py gen_protos
sudo -H python3 setup.py install --link-opencv
```

Performance of this model is moderate slow
```
holistic ~ 3.5 fps
hands ~ 8.5 fps
pose ~ 6.5 fps
face ~ 14 fps
```

Memory wise this model is superb to others I have tested sofar whole model taking only ~ 1GB in total


In [None]:
import cv2
import traitlets

import numpy as np
import mediapipe as mp

from jetutils import SimpleTimer


In [None]:
mp_drawing = mp.solutions.drawing_utils
class MediaPipeAdapter(traitlets.HasTraits):
    output_frame = traitlets.Any()
    draw_original = traitlets.Bool(default_value=True)
    solution_name = traitlets.Unicode(default_value='holistic')
    
    def __init__(self):
        self._timer = SimpleTimer()
        self._full_timer = SimpleTimer()
        self._model = {
            'holistic': mp.solutions.holistic.Holistic(smooth_landmarks=True,
                                                       min_detection_confidence=0.4,
                                                       min_tracking_confidence=0.5),
            'hands': mp.solutions.hands.Hands(max_num_hands=2,
                                              min_detection_confidence=0.4,
                                              min_tracking_confidence=0.5),
            'face': mp.solutions.face_mesh.FaceMesh(min_detection_confidence=0.4,
                                                    min_tracking_confidence=0.5),
            'pose': mp.solutions.pose.Pose(min_detection_confidence=0.4,
                                           min_tracking_confidence=0.5),
        }
        self._draw = {
            'holistic': self._draw_holistic,
            'hands': self._draw_hands,
            'face': self._draw_face,
            'pose': self._draw_pose,
        }

        self._drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
        self.output_frame = np.zeros((480, 640, 3), dtype=np.uint8)
        
    def __del__(self):
        for model in self._model:
            self._model[model].close()
        
    def _draw_holistic(self, frame, results):
        mp_drawing.draw_landmarks(
            frame, results.face_landmarks, mp.solutions.holistic.FACE_CONNECTIONS)
        mp_drawing.draw_landmarks(
            frame, results.left_hand_landmarks, mp.solutions.holistic.HAND_CONNECTIONS)
        mp_drawing.draw_landmarks(
            frame, results.right_hand_landmarks, mp.solutions.holistic.HAND_CONNECTIONS)
        mp_drawing.draw_landmarks(
            frame, results.pose_landmarks, mp.solutions.holistic.POSE_CONNECTIONS)
    
    def _draw_hands(self, frame, results):
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    frame, hand_landmarks, mp.solutions.hands.HAND_CONNECTIONS)

    def _draw_face(self, frame, results):
        if results.multi_face_landmarks:
            for face_landmarks in results.multi_face_landmarks:
                mp_drawing.draw_landmarks(frame, face_landmarks,
                                          mp.solutions.face_mesh.FACE_CONNECTIONS,
                                          landmark_drawing_spec=self._drawing_spec,
                                          connection_drawing_spec=self._drawing_spec)

    def _draw_pose(self, frame, results):
        mp_drawing.draw_landmarks(frame,
                                  results.pose_landmarks, mp.solutions.pose.POSE_CONNECTIONS)
    
    def _dbgstr(self, results):
        # done very ugly here...
        if self.solution_name == 'holistic':
            dbgstr = 'Landmarks:\n'
            dbgstr += 'face: {} landmarks\n\n'.format(len(results.face_landmarks.landmark)) if results.face_landmarks else 'face: NONE\n\n'
            if results.pose_landmarks:
                for i in range(len(results.pose_landmarks.landmark)):
                    dbgstr += '{}{} x:{}\t y:{}\t visibility %:{}\n'.format(mp.solutions.holistic.PoseLandmark(i).name.lower(),
                                                                            ' '*(20-len(mp.solutions.holistic.PoseLandmark(i).name)),
                                                                            int(results.pose_landmarks.landmark[i].x*640),
                                                                            int(results.pose_landmarks.landmark[i].y*480),
                                                                            int(results.pose_landmarks.landmark[i].visibility*100))
                dbgstr += '\n\n'
            if results.left_hand_landmarks:
                dbgstr += 'left hand:\n'
                for i in range(len(results.left_hand_landmarks.landmark)):
                    dbgstr += '{}{} x:{}\t y:{}\t z(depth):{}\n'.format(mp.solutions.holistic.HandLandmark(i).name.lower(),
                                                                 ' '*(20-len(mp.solutions.holistic.HandLandmark(i).name)),
                                                                 int(results.left_hand_landmarks.landmark[i].x*640),
                                                                 int(results.left_hand_landmarks.landmark[i].y*480),
                                                                 int(results.left_hand_landmarks.landmark[i].z*640))
                dbgstr += '\n\n'
            else:
                dbgstr += 'left hand: NONE\n\n'

            if results.right_hand_landmarks:
                dbgstr += 'right hand:\n'
                for i in range(len(results.right_hand_landmarks.landmark)):
                    dbgstr += '{}{} x:{}\t y:{}\t z(depth):{}\n'.format(mp.solutions.holistic.HandLandmark(i).name.lower(),
                                                                 ' '*(20-len(mp.solutions.holistic.HandLandmark(i).name)),
                                                                 int(results.right_hand_landmarks.landmark[i].x*640),
                                                                 int(results.right_hand_landmarks.landmark[i].y*480),
                                                                 int(results.right_hand_landmarks.landmark[i].z*640))
                dbgstr += '\n\n'
            else:
                dbgstr += 'right hand: NONE\n\n'

        elif self.solution_name == 'hands':
            dbgstr = 'hands {}\n\n'.format(len(results.multi_hand_landmarks)) if results.multi_hand_landmarks else 'hands NONE\n'
            if results.multi_hand_landmarks:
                hnd = 0
                for hand_landmarks in results.multi_hand_landmarks:
                    dbgstr += 'hand {}\n'.format(hnd)
                    for i in range(len(hand_landmarks.landmark)):
                        dbgstr += '{}{} x:{}\t y:{}\t z(depth):{}\n'.format(mp.solutions.holistic.HandLandmark(i).name.lower(),
                                                                     ' '*(20-len(mp.solutions.holistic.HandLandmark(i).name)),
                                                                     int(hand_landmarks.landmark[i].x*640),
                                                                     int(hand_landmarks.landmark[i].y*480),
                                                                     int(hand_landmarks.landmark[i].z*640))
                    dbgstr += '\n\n'
                    hnd += 1
            
        elif self.solution_name == 'face':
            dbgstr = 'faces {}\n'.format(len(results.multi_face_landmarks)) if results.multi_face_landmarks else 'faces NONE\n'
                
        elif self.solution_name == 'pose':
            dbgstr = 'Landmarks:\n'
            if results.pose_landmarks:
                for i in range(len(results.pose_landmarks.landmark)):
                    dbgstr += '{}{} x:{}\t y:{}\t visibility %:{}\n'.format(mp.solutions.pose.PoseLandmark(i).name.lower(),
                                                                            ' '*(20-len(mp.solutions.pose.PoseLandmark(i).name)),
                                                                            int(results.pose_landmarks.landmark[i].x*640),
                                                                            int(results.pose_landmarks.landmark[i].y*480),
                                                                            int(results.pose_landmarks.landmark[i].visibility*100))
        else:
            dbgstr = ''
        return dbgstr

            
    def process_frame(self, frame):
        frame = frame.astype(np.uint8)
        if self.draw_original:
            outframe = frame.copy()
        else:
            outframe = np.zeros_like(frame)
                
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame.flags.writeable = False #pass reference - should be faster

        with self._full_timer: 
            with self._timer:
                results = self._model[str(self.solution_name)].process(frame)
            model_time = self._timer.time
            frame.flags.writeable = True
            with self._timer:
                self._draw[str(self.solution_name)](outframe, results)
            draw_time = self._timer.time
        self.output_frame = outframe
        return 'model {}\nfps {}\nmodel time {}\ndraw time {}\n\n{}'.format(self.solution_name,
                                                                            self._full_timer.fps,
                                                                            model_time,
                                                                            draw_time,
                                                                            self._dbgstr(results))

mediapipe = MediaPipeAdapter()

In [None]:
from jetutils import GstCamera, bgr8_to_jpeg
camera = GstCamera()

In [None]:
# test all models once
for model in mediapipe._model:
    mediapipe.solution_name = value = model
    print(mediapipe.process_frame(camera.read()))
    print('-'*50)
    

In [None]:
import ipywidgets
from IPython.display import display
from sidecar import Sidecar

image_original = ipywidgets.Image(format='jpeg', width=camera.width, height=camera.height)
image_processed = ipywidgets.Image(format='jpeg', width=camera.width, height=camera.height)
image_original.value = bgr8_to_jpeg(np.zeros((camera.height, camera.width, 3), dtype=np.uint8))
image_processed.value = bgr8_to_jpeg(np.zeros((camera.height, camera.width, 3), dtype=np.uint8))


debug_out = ipywidgets.Textarea(value='',
                                disabled=True,
                                layout=ipywidgets.Layout(width='640px', height='1200px'))
images_out = ipywidgets.HBox([image_original, image_processed])
select_outmode = ipywidgets.ToggleButtons(options=['holistic', 'hands', 'pose', 'face'],
                                     value='holistic', description='model ',
                                     style={'description_width': 'initial'})
select_original = ipywidgets.ToggleButton(value=True, description='over orginal',
                                         style={'description_wifth': 'initial'})

control_box = ipywidgets.HBox([select_outmode, select_original])
all_box = ipywidgets.VBox([images_out, control_box, debug_out])

_sidecar = Sidecar(title='output')
with _sidecar:
    display(all_box)

In [None]:
traitlets.dlink((camera, 'value'), (image_original, 'value'), transform=bgr8_to_jpeg)
traitlets.dlink((mediapipe,'output_frame'), (image_processed, 'value'), transform=bgr8_to_jpeg)
# traitlets.dlink((select_outmode, 'value'), (mediapipe, 'solution_name'))
traitlets.dlink((select_original, 'value'), (mediapipe, 'draw_original'))

def process(change):
    debug_out.value = mediapipe.process_frame(change['new'])
    
def change_model(change):
    # stop camera while changin mode to prevent occasionally hangs
    camera.running = False
    mediapipe.solution_name = change['new']
    camera.running = True
    
camera.observe(process, names='value')
select_outmode.observe(change_model, names='value')
camera.running = True

In [None]:
camera.unobserve_all()
camera.running = False

In [None]:
del mediapipe

In [None]:
del camera