# Исследование методов классификации движений человека

## Описание эксперимента

В ноутбуке проведено исследование нескольких моделей на распознавание ключевых точек на теле человека.  
Данные были взяты из датасета:
  - LSP
  - LSPE

Для разных датасетов разная предобработка аннотаций к изображениям.

Модели:  
  - BlazePose
  - MoveNet
  - OpenPose
  - MMPose

Способы оценки моделей:  
  - PCK
  - PDJ
  - AP с OKS в качестве метрики корректности

Трешхолды для метрик PDJ и PCK: 0.05, 0.2, 0.5.  
Трешхолды для метрикb AP: 0.5, 0.75, 0.5:0.95:0.05.
  

In [None]:
!pip install tensorflow
!pip install mediapipe
!pip install -q opencv-python
!pip install -q git+https://github.com/tensorflow/docs
!python -m pip install pyyaml==5.1

In [2]:
# All imports
import cv2
import math
import numpy as np
from google.colab.patches import cv2_imshow

# Import matplotlib libraries
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib.patches as patches

# sys utils
import os 
import shutil
import sys
import warnings
import time

# First model
import mediapipe as mp

# Second model
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed

# Next models will be load from git repositories and will be import there

### Загрузка данных

In [None]:
from google.colab import files, drive
# files.upload()
drive.mount('/content/drive')
%cp -av '/content/drive/MyDrive/Диплом/LSPE/' '/content/data'
PATH = "/content/data/"

# можно загрузить данные на гугл диск и оттуда их подгружать в /content/
# Директория должны выглядеть:
# - /data
# |- data.txt
# |- joints.mat
# |- /images
#    |- 01.jpg
#    |- 02.jpg
#    ...

In [4]:
from scipy.io import loadmat
joints = loadmat(PATH + 'joints.mat')['joints']
joints

array([[[  0.        ,  75.71063741, 149.31389391, ..., 154.71535102,
          49.67604825, 118.06726708],
        [-26.10911452,  49.4939424 , 195.50574922, ..., 133.20360364,
          17.03758838,  23.92387506],
        [  0.        ,   1.        ,   1.        , ...,   1.        ,
           1.        ,   1.        ]],

       [[ 26.03094352,  83.65509044, 120.93402709, ..., 160.73603675,
          37.24532382,  97.11380604],
        [ 86.41022512,  63.1054386 , 178.16249727, ..., 115.75988716,
          41.3056107 ,  31.04216451],
        [  1.        ,   1.        ,   1.        , ...,   1.        ,
           1.        ,   1.        ]],

       [[ 50.60790641,  86.64750109, 156.43016682, ..., 187.84539463,
          52.84619531,  63.68995822],
        [ 75.70079791,  67.02470209, 155.32224409, ..., 116.96402431,
          33.21626993,  31.04216451],
        [  1.        ,   1.        ,   1.        , ...,   1.        ,
           1.        ,   1.        ]],

       ...,

       [[

In [5]:
real_data = []
for i in range(len(joints[0][0])):
    photo = []
    for j in range(12):
        # photo.append([joints[0,j,i], joints[1,j,i], joints[2,j,i]]) # for LSP
        photo.append([joints[j,0,i], joints[j,1,i], joints[j,2,i]]) # for LSPE
    real_data.append(photo)

real_data = np.array(real_data)

In [6]:
real_data

array([[[  0.        , -26.10911452,   0.        ],
        [ 26.03094352,  86.41022512,   1.        ],
        [ 50.60790641,  75.70079791,   1.        ],
        ...,
        [102.38837784,  68.46216316,   1.        ],
        [110.78394341,  88.42703696,   1.        ],
        [139.12874857,  89.58396778,   1.        ]],

       [[ 75.71063741,  49.4939424 ,   1.        ],
        [ 83.65509044,  63.1054386 ,   1.        ],
        [ 86.64750109,  67.02470209,   1.        ],
        ...,
        [ 89.93120834,  89.63991173,   1.        ],
        [122.74179937, 100.44436785,   1.        ],
        [123.72161524, 136.1944065 ,   1.        ]],

       [[149.31389391, 195.50574922,   1.        ],
        [120.93402709, 178.16249727,   1.        ],
        [156.43016682, 155.32224409,   1.        ],
        ...,
        [111.47407148, 145.86228848,   1.        ],
        [120.93402709, 166.31624655,   1.        ],
        [119.35736782, 177.39547384,   1.        ]],

       ...,

      

### Высчитывание метрик

In [7]:
KEYPOINTS = {
    "right ankle": 0,
    "right knee": 1,
    "right hip": 2,
    "left hip": 3,
    "left knee": 4,
    "left ankle": 5,
    "right wrist": 6,
    "right elbow": 7,
    "right shoulder": 8,
    "left shoulder": 9,
    "left elbow": 10,
    "left wrist": 11,
}

In [8]:
# Calculate metrics

# Size calculations

def euclidian_metric(a, b):
    res = np.array(a) - np.array(b)
    return np.sqrt(np.sum(res ** 2))

def calc_diag(points):
    x_min = min(points[:, 0])
    x_max = max(points[:, 0])
    y_min = min(points[:, 1])
    y_max = max(points[:, 1])
    return euclidian_metric([x_min, y_min], [x_max, y_max])

def calc_height(points):
    y_min = min(points[:, 1])
    y_max = max(points[:, 1])
    return y_max - y_min

def calc_square(points):
    x_min = min(points[:, 0])
    x_max = max(points[:, 0])
    y_min = min(points[:, 1])
    y_max = max(points[:, 1])
    return (x_max - x_min) * (y_max - y_min)

# Calculate PCK and PDJ

def calc_pck(pred, real, threshold):
    height = calc_height(real)
    sum = 0
    for p, r in zip(pred, real):
        dist = euclidian_metric(p[:2], r[:2])
        if dist < threshold * height:
            sum += 1
    return sum / len(real)

def calc_pdj(pred, real, threshold):
    diag = calc_diag(real)
    sum = 0
    for p, r in zip(pred, real):
        dist = euclidian_metric(p[:2], r[:2])
        if dist < threshold * diag:
            sum += 1
    return sum / len(real)

# Calculate AP and mAP

KEYPOINTS_OKS = {
    "right ankle": 0.089,
    "right knee": 0.087,
    "right hip": 0.107,
    "left hip": 0.107,
    "left knee": 0.087,
    "left ankle": 0.089,
    "right wrist": 0.062,
    "right elbow": 0.072,
    "right shoulder": 0.079,
    "left shoulder": 0.079,
    "left elbow": 0.072,
    "left wrist": 0.062,
}

def calc_oks(pred, real):
    square = calc_square(real)
    sum = 0
    for k, p, r in zip(KEYPOINTS_OKS.values(), pred, real):
        dist = euclidian_metric(p[:2], r[:2])
        if r[2] > 0:
            sum += np.exp( - dist * dist / (2 * square * k * k))
    
    return sum / np.sum(np.array(real)[:,2] > 0)

def calc_AP(threshold, oks):
    n = len(oks)
    corrects = [np.array(oks) > threshold]
    recalls = []
    precisions = []
    right_n = np.sum(corrects)
    for ind in range(n):
        precisions.append(np.sum(corrects[:i]) / i)
        recalls.append(np.sum(corrects[:i]) / right_n)

    d = {}
    d[1] = 0
    for r, p in zip(recalls, precisions):
        x_min = min(d.keys())
        if ((r in d.keys()) and (d[r] < p)) or ((not r in d.keys()) and (d[x_min] < p)):
            d[r] = p
    d[0] = 1

    step_size = 1 / n
    sorted_keys = sorted(d.keys())
    sum = 0
    for r_min, r_max in zip(sorted_keys[:-1], sorted_keys[1:]):
        sum += round((r_max - r_min) / step_size, 1) * d[r_max]

    return (sum + 1) / (n + 1)

def calc_mAP(oks):
    APs = []
    for threshold in np.arange(0.5, 1, 0.05):
        APs.append(calc_AP(threshold, oks))
    
    return np.mean(APs)

### Первая модель: BlazePose by MediaPipe

In [9]:
with open(PATH + 'data.txt') as f:
    names = f.read().split('\n')[:-1]

images = {name: cv2.imread(PATH + 'images/' + name) for name in names}

In [10]:
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils 
mp_drawing_styles = mp.solutions.drawing_styles

# help(mp_pose.Pose)

In [11]:
KEYPOINT_DICT = {
    "right ankle": mp_pose.PoseLandmark.RIGHT_ANKLE,
    "right knee": mp_pose.PoseLandmark.RIGHT_KNEE,
    "right hip": mp_pose.PoseLandmark.RIGHT_HIP,
    "left hip": mp_pose.PoseLandmark.LEFT_HIP,
    "left knee": mp_pose.PoseLandmark.LEFT_KNEE,
    "left ankle": mp_pose.PoseLandmark.LEFT_ANKLE,
    "right wrist": mp_pose.PoseLandmark.RIGHT_WRIST,
    "right elbow": mp_pose.PoseLandmark.RIGHT_ELBOW,
    "right shoulder": mp_pose.PoseLandmark.RIGHT_SHOULDER,
    "left shoulder": mp_pose.PoseLandmark.LEFT_SHOULDER,
    "left elbow": mp_pose.PoseLandmark.LEFT_ELBOW,
    "left wrist": mp_pose.PoseLandmark.LEFT_WRIST,
}

In [12]:
results = []
time_landmarks = []
errors = 0

mp_real_data = []

with mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.5, model_complexity=1) as pose:
    for ind, image in enumerate(images.values()):

        start = time.time()
        keypoints = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        end = time.time()

        image_height, image_width, _ = image.shape
        if not keypoints.pose_landmarks:
            continue
        
        coords = []
        for key in KEYPOINTS.keys():
            coords.append([
                     keypoints.pose_landmarks.landmark[KEYPOINT_DICT[key]].x * image_width,
                     keypoints.pose_landmarks.landmark[KEYPOINT_DICT[key]].y * image_height,
                     keypoints.pose_landmarks.landmark[KEYPOINT_DICT[key]].visibility
            ])
        
        results.append(np.array(coords))
        time_landmarks.append(end - start)
        mp_real_data.append(real_data[ind])

print(f"Errors: {errors} / {len(names)}\nPercentage of errors: {errors * 100 / len(names)} %")   

Errors: 0 / 9999
Percentage of errors: 0.0 %


In [13]:
# calculate metrics

pck = {}
pdj = {}

for threshold in [0.05, 0.2, 0.5]:
    pdjs = []
    pcks = []
    for pred, real in zip(results, mp_real_data):
        pdjs.append(calc_pdj(pred, real, threshold))
        pcks.append(calc_pck(pred, real, threshold))
    
    pck[threshold] = round(np.mean(pcks), 3)
    pdj[threshold] = round(np.mean(pdjs), 3)

oks = []
for pred, real in zip(results, mp_real_data):
    oks.append(calc_oks(pred, real))

AP = {}
for threshold in [0.5, 0.75]:
    AP[threshold] = round(calc_AP(threshold, oks), 3)

mAP = round(calc_mAP(oks), 3)

In [14]:
print(f"pck: {pck}")
print(f"pdj: {pdj}")
print(f"AP:  {AP}")
print(f"mAP: {mAP}")
print(f"Average time: {round(np.mean(time_landmarks), 3)}")

pck: {0.05: 0.454, 0.2: 0.743, 0.5: 0.825}
pdj: {0.05: 0.557, 0.2: 0.783, 0.5: 0.851}
AP:  {0.5: 0.675, 0.75: 0.457}
mAP: 0.443
Average time: 0.075


### Вторая модель: Move_Net.Singe_Pose.lightning by TensorFlowHub

In [None]:
model_name = "movenet_lightning"
module = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")

def movenet(input_image):
    """Runs detection on an input image.

    Args:
        input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
        A [1, 1, 17, 3] float numpy array representing the predicted keypoint
        coordinates and scores.
    """
    model = module.signatures['serving_default']

    # SavedModel format expects tensor type of int32.
    input_image = tf.cast(input_image, dtype=tf.int32)
    # Run model inference.
    outputs = model(input_image)
    # Output is a [1, 1, 17, 3] tensor.
    keypoints_with_scores = outputs['output_0'].numpy()
    return keypoints_with_scores

In [None]:
KEYPOINT_DICT = {
    'nose': 0,
    'left eye': 1,
    'right eye': 2,
    'left ear': 3,
    'right ear': 4,
    'left shoulder': 5,
    'right shoulder': 6,
    'left elbow': 7,
    'right elbow': 8,
    'left wrist': 9,
    'right wrist': 10,
    'left hip': 11,
    'right hip': 12,
    'left knee': 13,
    'right knee': 14,
    'left ankle': 15,
    'right ankle': 16
}

# Maps bones to a matplotlib color name.
KEYPOINT_EDGE_INDS_TO_COLOR = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

def _keypoints_and_edges_for_display(keypoints_with_scores,
                                     height,
                                     width,
                                     keypoint_threshold=0.11):
  """Returns high confidence keypoints and edges for visualization.

  Args:
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    height: height of the image in pixels.
    width: width of the image in pixels.
    keypoint_threshold: minimum confidence score for a keypoint to be
      visualized.

  Returns:
    A (keypoints_xy, edges_xy, edge_colors) containing:
      * the coordinates of all keypoints of all detected entities;
      * the coordinates of all skeleton edges of all detected entities;
      * the colors in which the edges should be plotted.
  """
  keypoints_all = []
  keypoint_edges_all = []
  edge_colors = []
  num_instances, _, _, _ = keypoints_with_scores.shape
  for idx in range(num_instances):
    kpts_x = keypoints_with_scores[0, idx, :, 1]
    kpts_y = keypoints_with_scores[0, idx, :, 0]
    kpts_scores = keypoints_with_scores[0, idx, :, 2]
    kpts_absolute_xy = np.stack(
        [width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)
    kpts_above_thresh_absolute = kpts_absolute_xy[
        kpts_scores > keypoint_threshold, :]
    keypoints_all.append(kpts_above_thresh_absolute)

    for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():
      if (kpts_scores[edge_pair[0]] > keypoint_threshold and
          kpts_scores[edge_pair[1]] > keypoint_threshold):
        x_start = kpts_absolute_xy[edge_pair[0], 0]
        y_start = kpts_absolute_xy[edge_pair[0], 1]
        x_end = kpts_absolute_xy[edge_pair[1], 0]
        y_end = kpts_absolute_xy[edge_pair[1], 1]
        line_seg = np.array([[x_start, y_start], [x_end, y_end]])
        keypoint_edges_all.append(line_seg)
        edge_colors.append(color)
  if keypoints_all:
    keypoints_xy = np.concatenate(keypoints_all, axis=0)
  else:
    keypoints_xy = np.zeros((0, 17, 2))

  if keypoint_edges_all:
    edges_xy = np.stack(keypoint_edges_all, axis=0)
  else:
    edges_xy = np.zeros((0, 2, 2))
  return keypoints_xy, edges_xy, edge_colors


def draw_prediction_on_image(
    image, keypoints_with_scores, crop_region=None, close_figure=False,
    output_image_height=None):
  """Draws the keypoint predictions on image.

  Args:
    image: A numpy array with shape [height, width, channel] representing the
      pixel values of the input image.
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    crop_region: A dictionary that defines the coordinates of the bounding box
      of the crop region in normalized coordinates (see the init_crop_region
      function below for more detail). If provided, this function will also
      draw the bounding box on the image.
    output_image_height: An integer indicating the height of the output image.
      Note that the image aspect ratio will be the same as the input image.

  Returns:
    A numpy array with shape [out_height, out_width, channel] representing the
    image overlaid with keypoint predictions.
  """
  height, width, channel = image.shape
  aspect_ratio = float(width) / height
  fig, ax = plt.subplots(figsize=(12 * aspect_ratio, 12))
  # To remove the huge white borders
  fig.tight_layout(pad=0)
  ax.margins(0)
  ax.set_yticklabels([])
  ax.set_xticklabels([])
  plt.axis('off')

  im = ax.imshow(image)
  line_segments = LineCollection([], linewidths=(4), linestyle='solid')
  ax.add_collection(line_segments)
  # Turn off tick labels
  scat = ax.scatter([], [], s=60, color='#FF1493', zorder=3)

  (keypoint_locs, keypoint_edges,
   edge_colors) = _keypoints_and_edges_for_display(
       keypoints_with_scores, height, width)

  line_segments.set_segments(keypoint_edges)
  line_segments.set_color(edge_colors)
  if keypoint_edges.shape[0]:
    line_segments.set_segments(keypoint_edges)
    line_segments.set_color(edge_colors)
  if keypoint_locs.shape[0]:
    scat.set_offsets(keypoint_locs)

  if crop_region is not None:
    xmin = max(crop_region['x_min'] * width, 0.0)
    ymin = max(crop_region['y_min'] * height, 0.0)
    rec_width = min(crop_region['x_max'], 0.99) * width - xmin
    rec_height = min(crop_region['y_max'], 0.99) * height - ymin
    rect = patches.Rectangle(
        (xmin,ymin),rec_width,rec_height,
        linewidth=1,edgecolor='b',facecolor='none')
    ax.add_patch(rect)

  fig.canvas.draw()
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = image_from_plot.reshape(
      fig.canvas.get_width_height()[::-1] + (3,))
  plt.close(fig)
  if output_image_height is not None:
    output_image_width = int(output_image_height / height * width)
    image_from_plot = cv2.resize(
        image_from_plot, dsize=(output_image_width, output_image_height),
         interpolation=cv2.INTER_CUBIC)
  return image_from_plot


In [None]:
# read data
with open(PATH + 'data.txt') as f:
    names = f.read().split('\n')[:-1]
# images = {name: tf.image.decode_jpeg(tf.io.read_file(PATH + "images/" + name)) for name in names}

In [None]:
input_size = 192
results = []
time_landmarks = []

for name in tqdm(names):
    image = tf.image.decode_jpeg(tf.io.read_file(PATH + "images/" + name))

    # Resize and pad the image to keep the aspect ratio and fit the expected size.
    input_image = tf.expand_dims(image, axis=0)
    input_image = tf.image.resize_with_pad(input_image, input_size, input_size)
    image_height, image_width, _ = image.shape

    # Run model inference.
    start = time.time()
    keypoints_with_scores = movenet(input_image)
    end = time.time()

    coords = []
    for key in KEYPOINTS.keys():
        coord = keypoints_with_scores[0,0,KEYPOINT_DICT[key]] * [image_height, image_width, 1]
        coords.append([coord[1], coord[0], coord[2]])

    results.append(np.array(coords))
    time_landmarks.append(end - start)

In [None]:
# calculate metrics

pck = {}
pdj = {}

for threshold in [0.05, 0.2, 0.5]:
    pdjs = []
    pcks = []
    for pred, real in zip(results, real_data):
        pdjs.append(calc_pdj(pred, real, threshold))
        pcks.append(calc_pck(pred, real, threshold))
    
    pck[threshold] = round(np.mean(pcks), 3)
    pdj[threshold] = round(np.mean(pdjs), 3)

oks = []
for pred, real in zip(results, real_data):
    oks.append(calc_oks(pred, real))

AP = {}
for threshold in [0.5, 0.75]:
    AP[threshold] = round(calc_AP(threshold, oks), 3)

mAP = round(calc_mAP(oks), 3)


In [None]:
print(f"pck: {pck}")
print(f"pdj: {pdj}")
print(f"AP:  {AP}")
print(f"mAP: {mAP}")
print(f"Average time: {round(np.mean(time_landmarks), 3)}")

pck: {0.05: 0.144, 0.2: 0.539, 0.5: 0.745}
pdj: {0.05: 0.227, 0.2: 0.635, 0.5: 0.806}
AP:  {0.5: 0.312, 0.75: 0.059}
mAP: 0.111
Average time: 0.009


### Третья модель: OpenPose

In [None]:
from os.path import exists, join, basename, splitext

git_repo_url = 'https://github.com/CMU-Perceptual-Computing-Lab/openpose.git'
project_name = splitext(basename(git_repo_url))[0]
if not exists(project_name):
  # see: https://github.com/CMU-Perceptual-Computing-Lab/openpose/issues/949
  # install new CMake becaue of CUDA10
  !wget -q https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.tar.gz
  !tar xfz cmake-3.13.0-Linux-x86_64.tar.gz --strip-components=1 -C /usr/local
  # clone openpose
  !git clone -q --depth 1 $git_repo_url
  !sed -i 's/execute_process(COMMAND git checkout master WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}\/3rdparty\/caffe)/execute_process(COMMAND git checkout f019d0dfe86f49d1140961f8c7dec22130c83154 WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}\/3rdparty\/caffe)/g' openpose/CMakeLists.txt
  # install system dependencies
  !apt-get -qq install -y libatlas-base-dev 
  !apt-get -qq install -y libprotobuf-dev 
  !apt-get -qq install -y libleveldb-dev 
  !apt-get -qq install -y libsnappy-dev
  !apt-get -qq install -y libhdf5-serial-dev
  !apt-get -qq install -y protobuf-compiler
  !apt-get -qq install -y libgflags-dev
  !apt-get -qq install -y libgoogle-glog-dev
  # build openpose
  !cd openpose && rm -rf build || true && mkdir build && cd build && cmake -DBUILD_PYTHON=ON .. && make -j`nproc`
if not exists("openpose/images"):
    !mkdir openpose/images
%cd openpose

In [None]:
# Define directories path
OpenposeDir = '/content/openpose/'

images_list = []
for name in names:
    src = os.path.join(PATH, 'images', name)
    destination = os.path.join(OpenposeDir,'images',name)
    shutil.copy(src, destination)
    images_list.append(destination)

images_list

In [None]:
KEYPOINT_DICT = {
    "right ankle": 11,
    "right knee": 10,
    "right hip": 9,
    "left hip": 12,
    "left knee": 13,
    "left ankle": 14,
    "right wrist": 4,
    "right elbow": 3,
    "right shoulder": 2,
    "left shoulder": 5,
    "left elbow": 6,
    "left wrist": 7,
}

In [None]:
# Import general libraries
warnings.simplefilter(action='ignore', category=FutureWarning)

# Set Python Openpose Directory for python api (Important)
pyopenpose_dir = os.path.join(OpenposeDir,'build','python') # ex: '/content/openpose/build/python'
if pyopenpose_dir not in sys.path:
    sys.path.append(pyopenpose_dir)
from openpose import pyopenpose as op

# Custom Params (refer to openpose/include/openpose/flags.hpp for more parameters)
params = dict()
params["model_folder"] = os.path.join(OpenposeDir,'models')  # ex: '/content/openpose/models'

# Starting OpenPose
opWrapper = op.WrapperPython()
opWrapper.configure(params)
opWrapper.start()

results = []
time_landmarks = []
# Process Image
for i, image in enumerate(images_list):
    input_image = cv2.imread(image)
    start = time.time()
    datum = op.Datum()
    datum.cvInputData = input_image
    opWrapper.emplaceAndPop(op.VectorDatum([datum]))
    network_output = datum.poseKeypoints
    end = time.time()

    coords = []
    if not network_output is None:
        for key in KEYPOINTS.keys():
            sec_index = KEYPOINT_DICT[key]
            coords.append(network_output[0, sec_index])
    else:
        coords = np.zeros((14,3), dtype=np.float32)

    results.append(np.array(coords))
    time_landmarks.append(end - start)

In [None]:
# calculate metrics

pck = {}
pdj = {}

for threshold in [0.05, 0.2, 0.5]:
    pdjs = []
    pcks = []
    for pred, real in zip(results, real_data):
        pdjs.append(calc_pdj(pred, real, threshold))
        pcks.append(calc_pck(pred, real, threshold))
    
    pck[threshold] = round(np.mean(pcks), 3)
    pdj[threshold] = round(np.mean(pdjs), 3)

oks = []
for pred, real in zip(results, real_data):
    oks.append(calc_oks(pred, real))

AP = {}
for threshold in [0.5, 0.75]:
    AP[threshold] = round(calc_AP(threshold, oks), 3)

mAP = round(calc_mAP(oks), 3)


In [None]:
print(f"pck: {pck}")
print(f"pdj: {pdj}")
print(f"AP:  {AP}")
print(f"mAP: {mAP}")
print(f"Average time: {round(np.mean(time_landmarks), 3)}")

pck: {0.05: 0.362, 0.2: 0.524, 0.5: 0.633}
pdj: {0.05: 0.42, 0.2: 0.569, 0.5: 0.69}
AP:  {0.5: 0.538, 0.75: 0.362}
mAP: 0.362
Average time: 0.053


### Четвертая модель: MMPose by Open-MMLab

In [None]:
# check NVCC version
!nvcc -V

# check GCC version
!gcc --version

# check python in conda environment
!which python

%cd /content

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Mon_Oct_12_20:09:46_PDT_2020
Cuda compilation tools, release 11.1, V11.1.105
Build cuda_11.1.TC455_06.29190527_0
gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

/usr/local/bin/python


In [None]:
# install dependencies: (use cu111 because colab has CUDA 11.1)
%pip install torch==1.10.0+cu111 torchvision==0.11.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html

# install mmcv-full thus we could use CUDA operators
%pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/index.html

# install mmdet for inference demo
%pip install mmdet

# clone mmpose repo
%rm -rf mmpose
!git clone https://github.com/open-mmlab/mmpose.git
%cd mmpose

# install mmpose dependencies
%pip install -r requirements.txt

# install mmpose in develop mode
%pip install -e .

In [None]:
# Check Pytorch installation
import torch, torchvision

print('torch version:', torch.__version__, torch.cuda.is_available())
print('torchvision version:', torchvision.__version__)

# Check MMPose installation
import mmpose

print('mmpose version:', mmpose.__version__)

# Check mmcv installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version

print('cuda version:', get_compiling_cuda_version())
print('compiler information:', get_compiler_version())

torch version: 1.10.0+cu111 True
torchvision version: 0.11.0+cu111
mmpose version: 0.27.0
cuda version: 11.1
compiler information: GCC 7.3


In [None]:
from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
                         vis_pose_result, process_mmdet_results)
from mmdet.apis import inference_detector, init_detector

pose_config = 'configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py'
pose_checkpoint = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth'
det_config = 'demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py'
det_checkpoint = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'

# initialize pose model
pose_model = init_pose_model(pose_config, pose_checkpoint)
# initialize detector
det_model = init_detector(det_config, det_checkpoint)

load checkpoint from http path: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth


Downloading: "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth" to /root/.cache/torch/hub/checkpoints/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth


  0%|          | 0.00/243M [00:00<?, ?B/s]

load checkpoint from http path: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth


Downloading: "https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth" to /root/.cache/torch/hub/checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth


  0%|          | 0.00/160M [00:00<?, ?B/s]

In [None]:
KEYPOINT_DICT = {
    "right ankle": 16,
    "right knee": 14,
    "right hip": 12,
    "left hip": 11,
    "left knee": 13,
    "left ankle": 15,
    "right wrist": 10,
    "right elbow": 8,
    "right shoulder": 6,
    "left shoulder": 5,
    "left elbow": 7,
    "left wrist": 9,
}

# "keypoints": [
#             "nose","left_eye","right_eye","left_ear","right_ear",
#             "left_shoulder","right_shoulder","left_elbow","right_elbow",
#             "left_wrist","right_wrist","left_hip","right_hip",
#             "left_knee","right_knee","left_ankle","right_ankle"
#         ]

In [None]:
with open(PATH + 'data.txt') as f:
    names = f.read().split('\n')[:-1]

results = []
time_landmarks = []

for name in names:
    name = PATH + 'images/' + name
    
    start = time.time()
    mmdet_results = inference_detector(det_model, name)
    person_results = process_mmdet_results(mmdet_results, cat_id=1)
    pose_results, returned_outputs = inference_top_down_pose_model(
        pose_model,
        name,
        person_results,
        bbox_thr=0.3,
        format='xyxy',
        dataset=pose_model.cfg.data.test.type)
    end = time.time()
    
    if pose_results:
        pose_results = pose_results[0]['keypoints']
    else:
        pose_results = np.zeros((17,3))

    coords = []
    for key in KEYPOINTS.keys():
        coords.append(pose_results[KEYPOINT_DICT[key]])

    results.append(np.array(coords))
    time_landmarks.append(end - start)

In [None]:
# calculate metrics

pck = {}
pdj = {}

for threshold in [0.05, 0.2, 0.5]:
    pdjs = []
    pcks = []
    for pred, real in zip(results, real_data):
        pdjs.append(calc_pdj(pred, real, threshold))
        pcks.append(calc_pck(pred, real, threshold))
    
    pck[threshold] = round(np.mean(pcks), 3)
    pdj[threshold] = round(np.mean(pdjs), 3)

oks = []
for pred, real in zip(results, real_data):
    oks.append(calc_oks(pred, real))

AP = {}
for threshold in [0.5, 0.75]:
    AP[threshold] = round(calc_AP(threshold, oks), 3)

mAP = round(calc_mAP(oks), 3)


In [None]:
print(f"pck: {pck}")
print(f"pdj: {pdj}")
print(f"AP:  {AP}")
print(f"mAP: {mAP}")
print(f"Average time: {round(np.mean(time_landmarks), 3)}")

pck: {0.05: 0.403, 0.2: 0.56, 0.5: 0.651}
pdj: {0.05: 0.463, 0.2: 0.587, 0.5: 0.72}
AP:  {0.5: 0.598, 0.75: 0.462}
mAP: 0.443
Average time: 0.436
