In [30]:
from enum import Enum, auto, IntEnum

import tensorflow as tf  # TF2
from PIL import Image
import numpy as np
import cv2


def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))


class BodyPart(IntEnum):

    NOSE = 0
    LEFT_EYE = auto()
    RIGHT_EYE = auto()
    LEFT_EAR = auto()
    RIGHT_EAR = auto()
    LEFT_SHOULDER = auto()
    RIGHT_SHOULDER = auto()
    LEFT_ELBOW = auto()
    RIGHT_ELBOW = auto()
    LEFT_WRIST = auto()
    RIGHT_WRIST = auto()
    LEFT_HIP = auto()
    RIGHT_HIP = auto()
    LEFT_KNEE = auto()
    RIGHT_KNEE = auto()
    LEFT_ANKLE = auto()
    RIGHT_ANKLE = auto()



In [31]:
def getPoseScale(Coords):
    sholder_middle = (Coords[BodyPart.RIGHT_SHOULDER] + Coords[BodyPart.LEFT_SHOULDER])/2
    hip_middle = (Coords[BodyPart.RIGHT_HIP] + Coords[BodyPart.LEFT_HIP])/2
    dist = [np.sqrt(np.linalg.norm(sholder_middle-hip_middle))*2.5]
    for point in range(17):
        dist.append(np.sqrt(np.linalg.norm(Coords[point]-hip_middle)))
    return np.max(dist)

In [32]:
interpreter = tf.lite.Interpreter(
    model_path="posenet_model.tflite",
    num_threads=4)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
floating_model = input_details[0]['dtype'] == np.float32


img_height = input_details[0]['shape'][1]
img_width = input_details[0]['shape'][2]

In [33]:
video_file = "legwork-up.MOV"  # 동영상 파일 경로

cap = cv2.VideoCapture(video_file)  # 동영상 캡쳐 객체 생성  ---①

result_data = list()

if cap.isOpened():  # 캡쳐 객체 초기화 확인
    while True:
        ret, img = cap.read()  # 다음 프레임 읽기      --- ②
        if ret:  # 프레임 읽기 정상
                img = cv2.resize(img, dsize=(img_width, img_height))
                # cv2.imshow(video_file, img)  # 화면에 표시  --- ③
                # cv2.waitKey(1)  # 25ms 지연(40fps로 가정)   --- ④
                input_data = np.expand_dims(img, axis=0)
                if floating_model:
                    input_data = (np.float32(input_data) - 127.5) / 127.5
                interpreter.set_tensor(input_details[0]['index'], input_data)
                interpreter.invoke()
                heatmaps = interpreter.get_tensor(output_details[0]['index'])
                offsets = interpreter.get_tensor(output_details[1]['index'])

                height = heatmaps.shape[1]
                width = heatmaps.shape[2]
                numKeypoints = heatmaps.shape[3]

                keypointPositions = [0 for i in range(numKeypoints)]

                for keypoint in range(numKeypoints):
                    maxVal = heatmaps[0][0][0][keypoint]
                    maxRow = 0
                    maxCol = 0
                    for row in range(height):
                        for col in range(width):
                            if heatmaps[0][row][col][keypoint] > maxVal:
                                maxVal = heatmaps[0][row][col][keypoint]
                                maxRow = row
                                maxCol = col

                    keypointPositions[keypoint] = (maxRow, maxCol)

                # xCoords = [0 for i in range(numKeypoints)]
                # yCoords = [0 for i in range(numKeypoints)]
                Coords = [0 for i in range(numKeypoints)]
                confidenceScores = [0 for i in range(numKeypoints)]

                for idx, position in enumerate(keypointPositions):
                    positionY = keypointPositions[idx][0]
                    positionX = keypointPositions[idx][1]
                    yCoords = position[0] / (height - 1) * img_height + offsets[0][positionY][positionX][idx]
                    xCoords = position[1] / (width - 1) * img_width + \
                                   offsets[0][positionY][positionX][idx + numKeypoints]
                    Coords[idx] = [xCoords, yCoords]
                    confidenceScores[idx] = sigmoid(heatmaps[0][positionY][positionX][idx])
                Coords = np.array(Coords)

                # recenterize
                center_of_mass = (Coords[BodyPart.LEFT_HIP] + Coords[BodyPart.RIGHT_HIP])/2
                Coords = Coords - center_of_mass

                # normalize
                norm_factor = getPoseScale(Coords)
                Coords = Coords/norm_factor
                result_data.append(Coords)
        else:
            break
else:
    print("can't open video.")  # 캡쳐 객체 초기화 실패
cap.release()  # 캡쳐 자원 반납
cv2.destroyAllWindows()


In [34]:
result_data = np.array(result_data)
np.save(video_file+".npy",result_data)
