In [1]:
import argparse
import cv2
import numpy as np
import torch
from models.with_mobilenet import PoseEstimationWithMobileNet
from modules.keypoints import extract_keypoints, group_keypoints
from modules.load_state import load_state
from modules.pose import Pose, track_poses
from val import normalize, pad_width
import math

In [2]:
class ImageReader(object):
    def __init__(self, file_names):
        self.file_names = file_names
        self.max_idx = len(file_names)

    def __iter__(self):
        self.idx = 0
        return self

    def __next__(self):
        if self.idx == self.max_idx:
            raise StopIteration
        print("ImageReader",self.file_names[self.idx])
        img = cv2.imread(self.file_names[self.idx],cv2.IMREAD_COLOR)
        #print(img.shape)
        if img.size == 0:
            raise IOError('Image {} cannot be read'.format(self.file_names[self.idx]))
        self.idx = self.idx + 1
        return img

In [3]:
class VideoReader(object):
    def __init__(self, file_name):
        self.file_name = file_name
        try:  # OpenCV needs int to read from webcam
            self.file_name = int(file_name)
        except ValueError:
            pass

    def __iter__(self):
        self.cap = cv2.VideoCapture(self.file_name)
        if not self.cap.isOpened():
            raise IOError('Video {} cannot be opened'.format(self.file_name))
        return self

    def __next__(self):
        was_read, img = self.cap.read()
        if not was_read:
            raise StopIteration
        return img

In [4]:
def run_demo(net, image_provider, height_size, cpu, track, smooth,videopath,hFov):
    net = net.eval()
    if not cpu:
        net = net.cuda()

    stride = 8
    upsample_ratio = 4
    num_keypoints = Pose.num_kpts
    previous_poses = []
    delay = 1
    
    if type(image_provider)==VideoReader:
        image_provider=image_provider.__iter__()
        imgw = int(image_provider.cap.get(3))
        imgh = int(image_provider.cap.get(4))
        fps = image_provider.cap.get(5)
        cameraf = imgw/2 /math.tan(hFov/2/180*3.14)
        if len(videopath) > 4:
            resultVideoPath = videopath[0:len(videopath)-4]+'_result.avi'
        else:
            resultVideoPath = videopath + '_result.avi'
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        print(resultVideoPath,fourcc,fps,imgw,imgh)
        out = cv2.VideoWriter(resultVideoPath,fourcc,fps,(imgw,imgh))
    elif type(image_provider)==ImageReader:
        
        imgw = 0
        imgh = 0
        cameraf = 0
    
    
    

    
   
    
    for imgindex,img in enumerate(image_provider):
        orig_img = img.copy()
        if type(image_provider)==ImageReader:
            imgw = orig_img.shape[1]
            imgh = orig_img.shape[0]
            cameraf = imgw/2 /math.tan(hFov/2/180*3.14)
        
        heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu)

        total_keypoints_num = 0
        all_keypoints_by_type = []
        for kpt_idx in range(num_keypoints):  # 19th for bg
            total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)

        pose_entries,all_keypoints = group_keypoints(all_keypoints_by_type, pafs)
        for kpt_id in range(all_keypoints.shape[0]):
            all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio) / scale
            all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio) / scale
#         for kpt_id in range(all_keypoints.shape[0]):
#            all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale
#            all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale
  
        current_poses = []
        for n in range(len(pose_entries)):
            if len(pose_entries[n]) == 0:
                continue
            pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1
            for kpt_id in range(num_keypoints):
                if pose_entries[n][kpt_id] != -1.0:  # keypoint was found
                    pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0])
                    pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1])
            #print(pose_keypoints,type(pose_keypoints))
            pose = Pose(pose_keypoints, pose_entries[n][18])
            current_poses.append(pose)

        if track:
            track_poses(previous_poses, current_poses, smooth=smooth)
            previous_poses = current_poses
        for pose in current_poses:
            pose.CalDistance(img,cameraf)
            pose.CalHeadPos(img,cameraf)
            pose.color[:]=[0,255,0]
            pose.draw(img)
            pose.GetNormPointsPos()
            name="data/Poses/kh_0"
            strlists = GetPosesDataFromTxt(name,pose.bbox[0],pose.bbox[1],pose.bbox[2],pose.bbox[3])
            poseRef = Pose(strlists[0],32.0)
            poseRef.color[:]=[255,0,0]
            poseRef.draw(img)
            print(pose.CmpWithAnotherPose(poseRef))
        img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0)

        for pose in current_poses:
            cv2.rectangle(img, (pose.bbox[0], pose.bbox[1]),
                          (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0))
            if track:
                cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255))
                cv2.putText(img,'dis:{}'.format(int(pose.headdis)),(pose.bbox[0], pose.bbox[1] - 32),cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0))
                cv2.putText(img,'headpitch:{}'.format(int(pose.headpitch)),(pose.bbox[0], pose.bbox[1] - 48),cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0))
                cv2.putText(img,'headroll:{}'.format(int(pose.headroll)),(pose.bbox[0], pose.bbox[1] - 64),cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0))
                cv2.putText(img,'headyaw:{}'.format(int(pose.headyaw)),(pose.bbox[0], pose.bbox[1] - 80),cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0))
                cv2.putText(img,'shoyaw:{}'.format(int(pose.shoyaw)),(pose.bbox[0]-80, pose.bbox[1]),cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0))
                cv2.putText(img,'shoroll:{}'.format(int(pose.shoroll)),(pose.bbox[0]-80, pose.bbox[1] + 16),cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0))
                cv2.putText(img,'ArmLpitch:{}'.format(int(pose.ArmLpitch)),(pose.bbox[0]-80, pose.bbox[1] + 16*2),cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0))
                cv2.putText(img,'ArmLyaw:{}'.format(int(pose.ArmLyaw)),(pose.bbox[0]-80, pose.bbox[1] + 16*3),cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0))
                cv2.putText(img,'ArmRpitch:{}'.format(int(pose.ArmRpitch)),(pose.bbox[0]-80, pose.bbox[1] + 16*4),cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0))
                cv2.putText(img,'ArmRyaw:{}'.format(int(pose.ArmRyaw)),(pose.bbox[0]-80, pose.bbox[1] + 16*5),cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0))
                cv2.putText(img,'AArmB2L_L:{}'.format(int(pose.AngArmBig2Little_L)),(pose.bbox[0]-80, pose.bbox[1] + 16*6),cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0))
                cv2.putText(img,'AArmB2L_R:{}'.format(int(pose.AngArmBig2Little_R)),(pose.bbox[0]-80, pose.bbox[1] + 16*7),cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0))
        
        cv2.imshow('Lightweight Human Pose Estimation Python Demo', img)
        if type(image_provider)==VideoReader:
            out.write(img)
        elif type(image_provider)==ImageReader:
            resultImgPath = videopath[imgindex][0:len(videopath)-5]+'_result.png'
            fp = open(resultImgPath[0:len(resultImgPath)-4]+".txt","w+")
            for pose in current_poses:
                #print(len(pose.NormPPlist))
                for i,p in enumerate(pose.NormPPlist):
                    if i==0:
                        fp.write(str(format(pose.NormPPlist[i],'.3f'))+",")
                    else:
                        #print(i,p)
                        fp.write(str(format(p[0],'.3f'))+"," + str(format(p[1],'.3f'))+",")
                fp.write("\n")
            fp.close()
            #print(resultImgPath)
            cv2.imwrite(resultImgPath,img)
        key = cv2.waitKey(delay)
        if key == 27:  # esc
            print("Exc")
            out.release()
            image_provider.cap.release()
            return
        elif key == 112:  # 'p'
            if delay == 1:
                delay = 0
            else:
                delay = 1
    if type(image_provider)==VideoReader:
        out.release()
        image_provider.cap.release()

In [5]:
def infer_fast(net, img, net_input_height_size, stride, upsample_ratio, cpu,
               pad_value=(0, 0, 0), img_mean=np.array([128, 128, 128], np.float32), img_scale=np.float32(1/256)):
    height, width, _ = img.shape
    scale = net_input_height_size / height

    scaled_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
    scaled_img = normalize(scaled_img, img_mean, img_scale)
    min_dims = [net_input_height_size, max(scaled_img.shape[1], net_input_height_size)]
    padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims)

    tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float()
    if not cpu:
        tensor_img = tensor_img.cuda()

    stages_output = net(tensor_img)

    stage2_heatmaps = stages_output[-2]
    heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0))
    heatmaps = cv2.resize(heatmaps, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)

    stage2_pafs = stages_output[-1]
    pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0))
    pafs = cv2.resize(pafs, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)

    return heatmaps, pafs, scale, pad

In [12]:
def GetPosesDataFromTxt(txtPath,x,y,w,h):
    fp = open(txtPath+"._result.txt",'r')
    strlists = fp.readlines()
    aspRatio = 0.0
    PoseDatas = []
    PoseData = []
    for i,str1list in enumerate(strlists):
        
        strlist = str1list.split(',')
       
        for j,p in enumerate(strlist):
           
            if j == 37:
                continue
            if j == 0:
                aspRatio = float(strlist[i])
            elif j%2== 1:
                #print(int(float(strlist[i])*w + x),int(float(strlist[i+1])*h + y))
                PoseData.append([int(float(strlist[j])*w + x),int(float(strlist[j+1])*h + y)])
            #print(PoseData)
        PoseDataNP = np.array(PoseData)
        #print(PoseDataNP)
        PoseDatas.append(PoseDataNP)
    fp.close()
    return PoseDatas

def Get2PVecsAng(a,b):
    da = math.sqrt(a[0]*a[0] + a[1]*a[1])
    db = math.sqrt(b[0]*b[0] + b[1]*b[1])
    a[0]*b[0] + a[1]*b[1]
    return math.acos((a[0]*b[0] + a[1]*b[1])/(da*db))



In [13]:
net = PoseEstimationWithMobileNet()

In [14]:
checkpoint = torch.load("models/checkpoint_iter_370000.pth", map_location='cpu')
load_state(net, checkpoint)

In [27]:
imgname = ["data/Poses/kh_1.png"]
frame_provider = ImageReader(imgname)

In [28]:
run_demo(net,frame_provider, 256, False, 1, 1,imgname,60)

ImageReader data/Poses/kh_1.png
([18.027756377319946, 14.035668847618199, 131.30879635424276, 122.00409829181969, 178.0, 101.9803902718557, 132.3782459469833, 206.0218435020908, 85.0, 25.079872407968907, 46.2709412050371, 55.226805085936306, 24.08318915758459, 5.830951894845301, 41.0, 15.524174696260024, 68.11754546370561, 54.00925846556311], 1323.8995379688315)


In [11]:
videoname = "0" #"movetest1.mp4"

In [12]:
frame_provider = VideoReader(videoname)

In [13]:
run_demo(net,frame_provider, 256, False, 1, 1,videoname,60)

0_result.avi 1145656920 30.0 640 480
Exc


In [None]:
img=cv2.imread('1.png')

In [None]:
w=1280;h=720

In [38]:
x1 = w/2 - 100;x2 = 850

In [39]:
f = 1107

In [40]:
theta1 = math.atan((x1-w/2)/f)*180/3.14

In [41]:
theta2 = math.atan((x2-w/2)/f)*180/3.14

In [42]:
print(theta2,theta1,theta2-theta1)

10.74692948640797 -5.164378589604892 15.911308076012862


In [7]:
math.sqrt(100)

10.0

In [22]:
a = [-1,1.73]
b = [1,1.73]


In [11]:
print(Get2PVecsAng(a,b)*180/3.14)

60.08926622017251


In [23]:
import math
print(math.acos(0.1)*180/3.14)

84.3035678388537


In [22]:
#读取txt并画在图像上，假设知道检测框
#step1 读取txt并提取数字数据
name="data/Poses/kh_0"
strlists = GetPosesDataFromTxt(name,513,145,130,442)


In [23]:
#读取图片验证读取的数据的正确性
img=cv2.imread(name + '.png')
pose = Pose(strlists[0], 32.0)
pose.draw(img)
cv2.imwrite(name + '_testresult.png',img)

True