# 姿勢解析プログラム

### 必要なライブラリのインポート

In [2]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed
import numpy as np
import pandas as pd
import cv2

# Import matplotlib libraries
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib.patches as patches

# Some modules to display an animation using imageio
import imageio
from IPython.display import HTML, display

import math

from PIL import Image
from os.path import dirname,basename

import moviepy.editor as mp

2024-02-01 00:39:22.915600: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-01 00:39:23.079835: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-01 00:39:23.642102: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-01 00:39:23.642306: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-01 00:39:23.771451: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

### Helper functions for visualization from Movenet tutorial

https://www.tensorflow.org/hub/tutorials/movenet?hl=ja#helper_functions_for_visualization

In [3]:
# Dictionary that maps from joint names to keypoint indices.
KEYPOINT_DICT = {
    'nose': 0,
    'left_eye': 1,
    'right_eye': 2,
    'left_ear': 3,
    'right_ear': 4,
    'left_shoulder': 5,
    'right_shoulder': 6,
    'left_elbow': 7,
    'right_elbow': 8,
    'left_wrist': 9,
    'right_wrist': 10,
    'left_hip': 11,
    'right_hip': 12,
    'left_knee': 13,
    'right_knee': 14,
    'left_ankle': 15,
    'right_ankle': 16
}

# Maps bones to a matplotlib color name.
KEYPOINT_EDGE_INDS_TO_COLOR = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

def _keypoints_and_edges_for_display(keypoints_with_scores,
                                     height,
                                     width,
                                     keypoint_threshold=0.11):
  """Returns high confidence keypoints and edges for visualization.

  Args:
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    height: height of the image in pixels.
    width: width of the image in pixels.
    keypoint_threshold: minimum confidence score for a keypoint to be
      visualized.

  Returns:
    A (keypoints_xy, edges_xy, edge_colors) containing:
      * the coordinates of all keypoints of all detected entities;
      * the coordinates of all skeleton edges of all detected entities;
      * the colors in which the edges should be plotted.
  """
  keypoints_all = []
  keypoint_edges_all = []
  edge_colors = []
  num_instances, _, _, _ = keypoints_with_scores.shape
  for idx in range(num_instances):
    kpts_x = keypoints_with_scores[0, idx, :, 1]
    kpts_y = keypoints_with_scores[0, idx, :, 0]
    kpts_scores = keypoints_with_scores[0, idx, :, 2]
    kpts_absolute_xy = np.stack(
        [width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)
    kpts_above_thresh_absolute = kpts_absolute_xy[
        kpts_scores > keypoint_threshold, :]
    keypoints_all.append(kpts_above_thresh_absolute)

    for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():
      if (kpts_scores[edge_pair[0]] > keypoint_threshold and
          kpts_scores[edge_pair[1]] > keypoint_threshold):
        x_start = kpts_absolute_xy[edge_pair[0], 0]
        y_start = kpts_absolute_xy[edge_pair[0], 1]
        x_end = kpts_absolute_xy[edge_pair[1], 0]
        y_end = kpts_absolute_xy[edge_pair[1], 1]
        line_seg = np.array([[x_start, y_start], [x_end, y_end]])
        keypoint_edges_all.append(line_seg)
        edge_colors.append(color)
  if keypoints_all:
    keypoints_xy = np.concatenate(keypoints_all, axis=0)
  else:
    keypoints_xy = np.zeros((0, 17, 2))

  if keypoint_edges_all:
    edges_xy = np.stack(keypoint_edges_all, axis=0)
  else:
    edges_xy = np.zeros((0, 2, 2))
  return keypoints_xy, edges_xy, edge_colors


def draw_prediction_on_image(
    image, keypoints_with_scores, output_image_height=None, keypoint_threshold=0.11):
  """Draws the keypoint predictions on image.

  Args:
    image: A numpy array with shape [height, width, channel] representing the
      pixel values of the input image.
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    output_image_height: An integer indicating the height of the output image.
      Note that the image aspect ratio will be the same as the input image.

  Returns:
    A numpy array with shape [out_height, out_width, channel] representing the
    image overlaid with keypoint predictions.
  """
  height, width, channel = image.shape
  aspect_ratio = float(width) / height
  # modified 
  # fig, ax = plt.subplots(figsize=(12 * aspect_ratio, 12))
  fig, ax = plt.subplots(figsize=(width / 100, height / 100))

  # To remove the huge white borders
  fig.tight_layout(pad=0)
  ax.margins(0)
  ax.set_yticklabels([])
  ax.set_xticklabels([])
  plt.axis('off')

  im = ax.imshow(image)
  line_segments = LineCollection([], linewidths=(4), linestyle='solid')
  ax.add_collection(line_segments)
  # Turn off tick labels
  scat = ax.scatter([], [], s=60, color='#FF1493', zorder=3)

  (keypoint_locs, keypoint_edges,
   edge_colors) = _keypoints_and_edges_for_display(
       keypoints_with_scores, height, width, keypoint_threshold)

  line_segments.set_segments(keypoint_edges)
  line_segments.set_color(edge_colors)
  if keypoint_edges.shape[0]:
    line_segments.set_segments(keypoint_edges)
    line_segments.set_color(edge_colors)
  if keypoint_locs.shape[0]:
    scat.set_offsets(keypoint_locs)

  fig.canvas.draw()
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
#  image_from_plot = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
  image_from_plot = image_from_plot.reshape(
      fig.canvas.get_width_height()[::-1] + (3,))
  plt.close(fig)
  if output_image_height is not None:
    output_image_width = int(output_image_height / height * width)
    image_from_plot = cv2.resize(
        image_from_plot, dsize=(output_image_width, output_image_height),
         interpolation=cv2.INTER_CUBIC)
  return image_from_plot

def to_gif(images, duration):
  """Converts image sequence (4D numpy array) to gif."""
  imageio.mimsave('./tmp/animation.gif', images, duration=duration)
  return embed.embed_file('./tmp/animation.gif')

def progress(value, max=100):
  return HTML("""
      <progress
          value='{value}'
          max='{max}',
          style='width: 100%'
      >
          {value}
      </progress>
  """.format(value=value, max=max))


### Tensorflowのモデル読み込み+推論関数定義

In [4]:
# movenet_lightning
module = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
input_size = 192

# movenet_thunder
# module = hub.load("https://tfhub.dev/google/movenet/singlepose/thunder/4")
# input_size = 256

def movenet(input_image):
    model = module.signatures['serving_default']
    # SavedMode format expects tensor type of int32
    input_image = tf.cast(input_image, dtype=tf.int32)
    # Run mode inference
    outputs = model(input_image)
    # Out put is a [1, 1, 17, 3] tensor.
    keypoints_with_scores = outputs['output_0'].numpy()
    return keypoints_with_scores



## 正方形座標系のkyepoints_with_scoresをもとの画像の縦横幅にリサイズする関数の定義

In [5]:
def resize_keypoints_with_scores(keypoints_with_scores, height, width):
    # 縦が長いか横が長いか判定
    if height > width:
        # 横幅を調整 
        xy = 1
        long_side = height
        dif = height - width
    else:
        # 高さを調整
        xy = 0
        long_side = width
        dif = width - height

    # 長辺の0-1.0の座標系を-0.5～0.5の範囲の座標系に変換
    tmp1 = keypoints_with_scores[0,0,:,[xy]] -0.5
    # 長辺と短辺の割合から、短編側を拡大　=> パディングした領域ははみ出るが、もともとの画像領域は-0.5～0.5の範囲に収まる（はず）
    tmp2 = tmp1 * ( long_side / (long_side -dif) )
    # 0-1.0の座標系に戻す
    tmp3 = tmp2 + 0.5
    # ndarrayをコピー
    tmp4 = np.copy(keypoints_with_scores)
    # 前に拡大した値をもとのx or yの列に代入
    tmp4[0,0,:,[xy]] = tmp3

    # デバッグ向けの出力
    # print(f'keypoints_with_scores[0,0,:,[xy]]\n {keypoints_with_scores[0,0,:,[xy]]}')
    # print(f'tmp1\n{tmp1}')
    # print(f'tmp2\n{tmp2}')
    # print(f'tmp3\n{tmp3}')
    # print(f'tmp4[0,0,:,[xy]]:\n{tmp4[0,0,:,[xy]]}')
        
    resized_keypoints_with_scores = tmp4
    return resized_keypoints_with_scores


# 直接MP4を読み込んで処理をする方式に変更

### 画像の推論部分だけ切り出して関数化

In [6]:
def pred3(frame, keypoint_threshold=0.11):

    current_frame = tf.expand_dims(frame, axis=0)
    _, h, w, _ = current_frame.shape
    # 推論向けにinput_sizeで正方形にリサイズ
    resized_current_frame = tf.image.resize_with_pad(current_frame, input_size, input_size)
    
    # 推論の実行
    keypoints_with_scores = movenet(resized_current_frame)
    
    # 推論結果を重ねるためにkeypoints_with_scoresをリサイズ
    keypoints_with_scores = resize_keypoints_with_scores(keypoints_with_scores, h, w)
    output_overlay = draw_prediction_on_image(np.squeeze(current_frame.numpy(), axis=0), keypoints_with_scores, keypoint_threshold=keypoint_threshold)
    
    return output_overlay, keypoints_with_scores



In [10]:
def predict_mp4(input_path, output_path):
    results = []
    ret = 1
    try:
        cap = cv2.VideoCapture(input_path)
        
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
        i = 0
        while True:
            ret, frame = cap.read()
            if ret:
                print(f'processing the {i}nd frame')
                #print(frame.shape)
                #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame, keypoints_with_scores = pred3(frame, keypoint_threshold = 0.2)
                out.write(frame)
                results.append(keypoints_with_scores)
                #print(frame.shape)
                i = i +1
                #if i > 60:
                #    break        
            else:        
                break

        ret=0
    finally:
        cap.release()
        out.release()
    np.save(output_path, results)
    return ret, results

In [12]:
%%time
input_path = 'images/PXL_20240121_053909046.TS.mp4'
output_path = input_path + '.pred.mp4'

ret,results = predict_mp4_2(input_path, output_path)
print(ret)

processing the 0nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 1nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 2nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 3nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 4nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 5nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 6nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 7nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 8nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 9nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 10nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 11nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 12nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 13nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 14nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 15nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 16nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 17nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 18nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 19nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 20nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 21nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 22nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 23nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 24nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 25nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 26nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 27nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 28nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 29nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 30nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 31nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 32nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 33nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 34nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 35nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 36nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 37nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 38nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 39nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 40nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 41nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 42nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 43nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 44nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 45nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 46nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 47nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 48nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 49nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 50nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 51nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 52nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 53nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 54nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 55nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 56nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 57nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 58nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 59nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 60nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 61nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 62nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 63nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 64nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 65nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 66nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 67nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 68nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 69nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 70nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 71nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 72nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 73nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 74nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 75nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 76nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 77nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 78nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 79nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 80nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 81nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 82nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 83nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 84nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 85nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 86nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 87nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 88nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 89nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 90nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 91nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 92nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 93nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 94nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 95nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 96nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 97nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 98nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 99nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 100nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 101nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 102nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 103nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 104nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 105nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 106nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 107nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 108nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 109nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 110nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 111nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 112nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 113nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 114nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 115nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 116nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 117nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 118nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 119nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 120nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 121nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 122nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 123nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 124nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 125nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 126nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 127nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 128nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 129nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 130nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 131nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 132nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 133nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 134nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 135nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 136nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 137nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 138nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 139nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 140nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 141nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 142nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 143nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 144nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 145nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 146nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 147nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 148nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 149nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 150nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 151nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 152nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 153nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 154nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 155nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 156nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 157nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 158nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 159nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 160nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 161nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 162nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 163nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 164nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 165nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 166nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 167nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 168nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 169nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 170nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 171nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 172nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 173nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 174nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 175nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 176nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 177nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 178nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 179nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 180nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 181nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 182nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 183nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 184nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 185nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 186nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 187nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 188nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 189nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 190nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 191nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 192nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



processing the 193nd frame


  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)



0
CPU times: user 1min 53s, sys: 1min 10s, total: 3min 4s
Wall time: 1min 47s


array([0.98089576, 0.4254498 , 0.136844  ], dtype=float32)

array([[[[0.3930312 , 0.48716873, 0.4296731 ],
         [0.38905072, 0.48105145, 0.40783602],
         [0.39015293, 0.48988336, 0.3618165 ],
         [0.38876414, 0.47394958, 0.49268872],
         [0.39067885, 0.4974077 , 0.43995893],
         [0.40097153, 0.46398586, 0.45412683],
         [0.40220374, 0.51248384, 0.42228258],
         [0.4127192 , 0.46374676, 0.36699656],
         [0.41226447, 0.5238168 , 0.41570747],
         [0.4360873 , 0.4696485 , 0.33688504],
         [0.43227637, 0.49106672, 0.26884463],
         [0.44265097, 0.47119194, 0.3719524 ],
         [0.4418808 , 0.51321656, 0.3855683 ],
         [0.48245418, 0.4481749 , 0.2200527 ],
         [0.49631867, 0.4921131 , 0.29614735],
         [0.5097182 , 0.45038828, 0.23717554],
         [0.5143422 , 0.4971372 , 0.28927422]]]], dtype=float32)

# 以下、試行錯誤の記録...

### mp4->animationGIF変換関数の定義

In [10]:
def mp4_to_frames(path, max_frame_per_sec=-1, do_resize=False, max_long_side=1000):
    cap = cv2.VideoCapture(path)
    
    if not cap.isOpened():
        return None,None
    
    # fpsを取得
    fps = cap.get(cv2.CAP_PROP_FPS)

    if max_frame_per_sec == -1:
        # 全フレームを対象とする
        per_frame = -1
        # フレームの間隔（ミリ秒）を計算
        dur = int(1000 / fps)
    else:
        # 何枚おきにフレームを取得するか計算
        per_frame = math.ceil(fps / max_frame_per_sec)
        # フレームの間隔（ミリ秒）を計算
        dur = int(1000 / fps * per_frame)
        
    print(f'per_frame:{per_frame}')
    
    print(f'dur:{dur}')
    
    frames = []
    i = 0
    while True:
        ret, frame = cap.read()
        if ret:
            # フレームの間引き
            if (i % per_frame) != 0:
                i += 1
                continue
            
            # BGRをRGBに変換
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pillow_image = Image.fromarray(rgb_frame)
            
            if do_resize:
                w, h = cal_resided_w_h(pillow_image.width,pillow_image.height,max_long_side)
                pillow_image = pillow_image.resize((w, h))
            frames.append(pillow_image)
            i += 1
        else:
            return frames,dur


def cal_resided_w_h(w,h,max_long_side):
    if w > max_long_side or h > max_long_side:
        if w > h:
            ret_w = max_long_side
            ret_h = int(h * max_long_side / w)
        else:
            ret_h = max_long_side
            ret_w = int(w * max_long_side / h)
    else:
        ret_w, ret_h = w,h
    return ret_w, ret_h

def save_gif(frames, save_path, dur):
    # see also: https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html#gif
    frames[0].save(save_path, format='GIF', save_all=True, append_images=frames[1:],duration=dur, loop=0 )

def mp4_to_gif(mp4_path, out_path, max_frame_per_sec=-1, do_resize=False, max_long_side=1000):
    frames, dur = mp4_to_frames(mp4_path, max_frame_per_sec, do_resize, max_long_side)
    save_gif(frames, out_path, dur)



### GIF->MP4変換関数の定義

In [5]:
def gif2mp4(input_gif, output_mp4):
    with mp.VideoFileClip(input_gif) as movie_file:
        movie_file.write_videofile(output_mp4)
        movie_file.close()

def mp42gif(input, output):
    with mp.VideoFileClip(input) as movie_file:
        movie_file.write_videofile(output,codec='gif')
        movie_file.close()


# アニメーションGIFの推論実行関数の定義

In [12]:
def pred(image_path):

    #image_path = 'images/dance_input.gif'
    #image_path ='images/pexels-photo-4384679.jpeg'
    images = tf.io.read_file(image_path)
    images = tf.image.decode_gif(images)
    
    # フレーム間隔を取得
    duration = Image.open(image_path).info['duration']
    cnt = images.shape[0]#[0,:,:,:]
    results = []
    result_frames = []
    for idx in range(cnt):
        # 推論向け画像変換
        current_frame = tf.expand_dims(images[idx], axis=0)
        # フレームサイズを取得
        _, h, w, _ = current_frame.shape

        # 推論向けに正方形にリサイズ
        resized_current_frame = tf.image.resize_with_pad(current_frame, input_size, input_size)
        # 推論の実行
        keypoints_with_scores = movenet(resized_current_frame)

        # 推論結果のスコアを保存
        results.append(keypoints_with_scores)
        
        # 推論結果を重ねるために長辺をベースに正方形にリサイズしてパディング
        resize_size = h if h > w else w
        display_image = tf.cast(tf.image.resize_with_pad(current_frame, resize_size, resize_size), dtype=tf.int32)
        output_overlay = draw_prediction_on_image(np.squeeze(display_image.numpy(), axis=0), keypoints_with_scores)
    
        result_frames.append(output_overlay)
        
    output = np.stack(result_frames, axis=0)
    to_gif(output, duration=duration)
    # save_gif(output, '/tmp/animation.gif', duration)

    return output, results



In [13]:
def pred2(image_path):

    #image_path = 'images/dance_input.gif'
    #image_path ='images/pexels-photo-4384679.jpeg'
    images = tf.io.read_file(image_path)
    images = tf.image.decode_gif(images)
    
    # フレーム間隔を取得
    duration = Image.open(image_path).info['duration']
    cnt = images.shape[0]#[0,:,:,:]
    results = []
    result_frames = []
    for idx in range(cnt):
        # 推論向け画像変換
        current_frame = tf.expand_dims(images[idx], axis=0)
        # フレームサイズを取得
        _, h, w, _ = current_frame.shape

        # 推論向けに正方形にリサイズ
        resized_current_frame = tf.image.resize_with_pad(current_frame, input_size, input_size)
        # 推論の実行
        keypoints_with_scores = movenet(resized_current_frame)

        # 推論結果のスコアを保存
        results.append(keypoints_with_scores)
        
        # 推論結果を重ねるためにkeypoints_with_scoresをリサイズ
        keypoints_with_scores = resize_keypoints_with_scores(keypoints_with_scores, h, w)
        output_overlay = draw_prediction_on_image(np.squeeze(current_frame.numpy(), axis=0), keypoints_with_scores)

        result_frames.append(output_overlay)
        
    output = np.stack(result_frames, axis=0)
    to_gif(output, duration=duration)
    # save_gif(output, '/tmp/animation.gif', duration)

    return output, results



# 入力のMP4をアニメーションGifに変換（リサイズ含む）

In [14]:
mp4_path = 'images/PXL_20240121_053909046.TS.mp4'
gif_path = dirname(mp4_path) + '/' + basename(mp4_path).split('.')[0]+'.gif'

In [15]:
%%time
mp4_to_gif(mp4_path, gif_path,do_resize=False)

per_frame:-1
dur:33
CPU times: user 1min 54s, sys: 4.19 s, total: 1min 58s
Wall time: 1min 51s


In [29]:
%%time
mp4_path = 'images/PXL_20240121_053909046.TS.mp4'
gif_path = dirname(mp4_path) + '/' + basename(mp4_path).split('.')[0]+'.gif'
mp42gif(mp4_path, gif_path)

Moviepy - Building video images/PXL_20240121_053909046.gif.
MoviePy - Writing audio in PXL_20240121_053909046TEMP_MPY_wvf_snd.mp3


                                                                    

MoviePy - Done.
Moviepy - Writing video images/PXL_20240121_053909046.gif



                                                              

Moviepy - Done !
Moviepy - video ready images/PXL_20240121_053909046.gif
CPU times: user 620 ms, sys: 1.54 s, total: 2.16 s
Wall time: 16.1 s


# アニメーションGIFを推論し、姿勢情報を取得

In [None]:
%%time
output, keypoints_with_scores = pred2(gif_path)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dty

In [23]:
out_path = gif_path + '.mp4'
gif2mp4('tmp/animation.gif', out_path)

Moviepy - Building video images/PXL_20240121_053909046.gif.mp4.
Moviepy - Writing video images/PXL_20240121_053909046.gif.mp4



                                                            

Moviepy - Done !
Moviepy - video ready images/PXL_20240121_053909046.gif.mp4


# 以下、解析用

In [None]:
def draw_prediction_on_image2(
    image, keypoints_with_scores, crop_region=None, close_figure=False,
    output_image_height=None):
  """Draws the keypoint predictions on image.

  Args:
    image: A numpy array with shape [height, width, channel] representing the
      pixel values of the input image.
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    crop_region: A dictionary that defines the coordinates of the bounding box
      of the crop region in normalized coordinates (see the init_crop_region
      function below for more detail). If provided, this function will also
      draw the bounding box on the image.
    output_image_height: An integer indicating the height of the output image.
      Note that the image aspect ratio will be the same as the input image.

  Returns:
    A numpy array with shape [out_height, out_width, channel] representing the
    image overlaid with keypoint predictions.
  """
  height, width, channel = image.shape
  aspect_ratio = float(width) / height
  fig, ax = plt.subplots(figsize=(12 * aspect_ratio, 12))
  # To remove the huge white borders
  fig.tight_layout(pad=0)
  ax.margins(0)
  ax.set_yticklabels([])
  ax.set_xticklabels([])
  plt.axis('off')

  im = ax.imshow(image)
  line_segments = LineCollection([], linewidths=(4), linestyle='solid')
  ax.add_collection(line_segments)
  # Turn off tick labels
  scat = ax.scatter([], [], s=60, color='#FF1493', zorder=3)

  (keypoint_locs, keypoint_edges,
   edge_colors) = _keypoints_and_edges_for_display(
       keypoints_with_scores, height, width)

  line_segments.set_segments(keypoint_edges)
  line_segments.set_color(edge_colors)
  if keypoint_edges.shape[0]:
    line_segments.set_segments(keypoint_edges)
    line_segments.set_color(edge_colors)
  if keypoint_locs.shape[0]:
    scat.set_offsets(keypoint_locs)

  if crop_region is not None:
    xmin = max(crop_region['x_min'] * width, 0.0)
    ymin = max(crop_region['y_min'] * height, 0.0)
    rec_width = min(crop_region['x_max'], 0.99) * width - xmin
    rec_height = min(crop_region['y_max'], 0.99) * height - ymin
    rect = patches.Rectangle(
        (xmin,ymin),rec_width,rec_height,
        linewidth=1,edgecolor='b',facecolor='none')
    ax.add_patch(rect)

  fig.canvas.draw()
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
#  image_from_plot = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
  image_from_plot = image_from_plot.reshape(
      fig.canvas.get_width_height()[::-1] + (3,))
  plt.close(fig)
  if output_image_height is not None:
    output_image_width = int(output_image_height / height * width)
    image_from_plot = cv2.resize(
        image_from_plot, dsize=(output_image_width, output_image_height),
         interpolation=cv2.INTER_CUBIC)
  return image_from_plot