# 3-4. Pose Estimation

In [None]:
!pip3 install --upgrade mxnet-cu101 > /dev/null
!pip3 install --upgrade gluoncv > /dev/null

In [None]:
!pip3 install --upgrade youtube-dl

In [None]:
def get_youtube_video(youtube_url):
  import youtube_dl

  ydl_opts = {  
    # 'format': 'mp4' 
    'format': 'best[height<=480][ext=mp4]' 
    } 
  with youtube_dl.YoutubeDL(ydl_opts) as ydl: 
      info_dict = ydl.extract_info(youtube_url, download=True)      
      # filename = '{}.{}'.format(info_dict['title'], info_dict['ext']) #ydl.prepare_filename(info_dict)
      filename = ydl.prepare_filename(info_dict)
  return filename

## 댄스 영상에서 pose 추출

In [None]:
kkang = get_youtube_video('https://www.youtube.com/watch?v=ZdrUWlG2ZQE')
print(kkang)

### 영상 이미지로 저장

In [None]:
def read_video(filename, output_path, start_ms = 0, end_ms = None, step=1):
  from tqdm.notebook import tqdm
  import os 
  from gluoncv.utils.filesystem import try_import_cv2
  cv2 = try_import_cv2()
  # video_frames = [] 

  if not os.path.exists(output_path):
        os.makedirs(output_path)

  cap = cv2.VideoCapture(filename)

  cap.set(cv2.CAP_PROP_POS_MSEC, start_ms);

  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
  fps = cap.get(cv2.CAP_PROP_FPS)
  total_ms = frame_count * fps * 1000
  print('Frame width:', width)
  print('Frame height:', height)
  print('Frame count:', frame_count)
  print('FPS:', fps)
  print('total sec:', int(total_ms/1000))

  capture_count = 0
  
  if end_ms != None:
    time_gap_sec = (end_ms - start_ms)/1000
    pbar_count = round(time_gap_sec*fps)
  else:
    pbar_count = round(frame_count)

  with tqdm(total=pbar_count) as pbar:
    while cap.isOpened():
        ret, img = cap.read()
        if not ret:
            cap.release()
            break 
        frame_no = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
        frame_msec = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # print('{}. msec:{}, endmsec:{}'.format(frame_no, frame_msec, end_ms)) 
        if end_ms != None and frame_msec >= end_ms:
            break

        # print('{}. msec:{}'.format(frame_no, frame_msec)) 

        # video_frames.append(img)
        cv2.imwrite(os.path.join(output_path, '%04d.jpg'%(frame_no)), img)
        capture_count += 1
        pbar.update(step)
        if step > 1:
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_no + (step-1));
  
  print('capture_count:', capture_count)

  return width, height, fps, frame_count

In [None]:
# !rm -rf kkang*

In [None]:
width, height, fps, frame_count = read_video(kkang, './kkang')

### 압축(다운로드 및 확인용도)

In [None]:
def compress_folder(input_path):
  from zipfile import ZipFile
  import os
  from os.path import basename
  from tqdm.notebook import tqdm

  # create a ZipFile object
  with ZipFile('{}.zip'.format(input_path), 'w') as zipObj:
    # Iterate over all the files in directory
    for folderName, subfolders, filenames in os.walk(input_path):
        for filename in tqdm(filenames):
            #create complete filepath of file in directory
            filePath = os.path.join(folderName, filename)
            # Add file to zip
            zipObj.write(filePath, basename(filePath))

In [None]:
compress_folder('./kkang')

### 영상 구간 이미지로 저장



In [None]:
def read_video_by_frame(filename, output_path, start_frame = 1, end_frame = None, step=1):
  from tqdm.notebook import tqdm
  import os 
  from gluoncv.utils.filesystem import try_import_cv2
  cv2 = try_import_cv2()
  # video_frames = [] 

  if not os.path.exists(output_path):
        os.makedirs(output_path)

  cap = cv2.VideoCapture(filename)
 
  cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame);

  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
  fps = cap.get(cv2.CAP_PROP_FPS)
  print('Frame width:', width)
  print('Frame height:', height)
  print('Frame count:', frame_count)
  print('FPS:', fps)

  capture_count = 0

  if end_frame != None:
    pbar_count = end_frame - start_frame
  else:
    pbar_count = frame_count
  
  with tqdm(total=pbar_count) as pbar:
    while cap.isOpened():
        ret, img = cap.read()
        if not ret:
            cap.release()
            break 
        frame_no = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
        frame_msec = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # print('{}. msec:{}, end_frame:{}'.format(frame_no, frame_msec, end_frame)) 
        if end_frame != None and frame_no > end_frame:
            break

        # print('{}. msec:{}'.format(frame_no, frame_msec)) 

        # video_frames.append(img)
        cv2.imwrite(os.path.join(output_path, '%04d.jpg'%(frame_no)), img)
        capture_count += 1
        pbar.update(step)
        if step > 1:
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_no + (step-1));
  
  print('capture_count:', capture_count)

  return width, height, fps, frame_count

### 구간별 이미지 생성

In [None]:
width, height, fps, frame_count = read_video_by_frame(kkang, './kkang_1', 1, 1200, 1)

In [None]:
width, height, fps, frame_count = read_video_by_frame(kkang, './kkang_2', 1201, 2400, 1)

In [None]:
width, height, fps, frame_count = read_video_by_frame(kkang, './kkang_3', 2401, 3600, 1)

In [None]:
width, height, fps, frame_count = read_video_by_frame(kkang, './kkang_4', 3601, 4601, 1)

### image -> video

In [None]:
def make_video_file(input_path, output_path, output_filename, width, height, fps, step=1): 
  import os   
  from gluoncv.utils.filesystem import try_import_cv2
  from tqdm.notebook import tqdm 
  cv2 = try_import_cv2()

  #  output path check
  if not os.path.exists(output_path):
          os.makedirs(output_path) 

  # image list
  file_list = os.listdir(input_path)
  file_list.sort()

  # output video codec
  fourcc = cv2.VideoWriter_fourcc(*'FMP4')

  out = cv2.VideoWriter('{}/{}'.format(output_path, output_filename), fourcc, fps/step, (width, height))
  for filename in tqdm(file_list):
  # for filename in tqdm(file_list, desc='{}'.format(output_filename)):
    im_file = '{}/{}'.format(input_path,filename)
    img = cv2.imread(im_file)
    out.write(img) 
  out.release()

In [None]:
make_video_file('kkang_1', 'kkang_split', 'kkang_1.mp4', 640, 360, 25.0, 1)
make_video_file('kkang_2', 'kkang_split', 'kkang_2.mp4', 640, 360, 25.0, 1)
make_video_file('kkang_3', 'kkang_split', 'kkang_3.mp4', 640, 360, 25.0, 1)
make_video_file('kkang_4', 'kkang_split', 'kkang_4.mp4', 640, 360, 25.0, 1)

In [None]:
import time
from gluoncv import model_zoo, data, utils

In [None]:
from matplotlib import pyplot as plt
from gluoncv import model_zoo, data, utils
from gluoncv.data.transforms.pose import detector_to_simple_pose, heatmap_to_coord, detector_to_alpha_pose

In [None]:
detector = model_zoo.get_model('yolo3_mobilenet1.0_coco', pretrained=True) 
pose_net = model_zoo.get_model('simple_pose_resnet18_v1b', pretrained=True) 
# pose_net = model_zoo.get_model('alpha_pose_resnet101_v1b_coco', pretrained=True) 

# Note that we can reset the classes of the detector to only include
# human, so that the NMS process is faster.

detector.reset_class(["person"], reuse_weights=['person'])

In [None]:
def display_detection(detector, pose_net, im_fname): 
  from gluoncv.data.transforms.pose import detector_to_alpha_pose, heatmap_to_coord
  # image pre processing(위의 display_ssd와 이미지 전처리 부분이 다릅니다)
  x, img = data.transforms.presets.yolo.load_test(im_fname)
  # print('Shape of pre-processed image:', x.shape)

  # inference
  class_IDs, scores, bounding_boxs = detector(x)

  # Process tensor from detector to keypoint network 
  pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs)
  # pose_input, upscale_bbox = detector_to_alpha_pose(img, class_IDs, scores, bounding_boxs)

  # Predict with a Simple Pose network
  predicted_heatmap = pose_net(pose_input)
  pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

  # display
  img = utils.viz.cv_plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2)
 
  return img

In [None]:
# !rm -rf kkang_pose

In [None]:
output_path = './kkang_pose'
import os 
if not os.path.exists(output_path):
        os.makedirs(output_path)

In [None]:
from gluoncv.utils.filesystem import try_import_cv2
cv2 = try_import_cv2()
fourcc = cv2.VideoWriter_fourcc(*'FMP4')

In [None]:
file_list = os.listdir('kkang_1')
file_list.sort()

In [None]:
width=640
height=360
fps=25.0

In [None]:
from tqdm.notebook import tqdm

out = cv2.VideoWriter('{}/kkang_1_pose.mp4'.format(output_path), fourcc, fps, (width, height))

for filename in tqdm(file_list):
  try:
    filename = '{}/{}'.format('kkang_1',filename)
    # print(filename)
    img = display_detection(detector, pose_net, filename)
    frame = cv2.resize(img, dsize=(width, height), interpolation=cv2.INTER_AREA)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    out.write(frame) 
  except Exception as ex:
    pass
out.release()

In [None]:
def get_pose_video(detector, pose_net, image_path, output_path, output_filename, width, height, fps, step=3): 
  from tqdm.notebook import tqdm
  import os 
  if not os.path.exists(output_path):
          os.makedirs(output_path)

  from gluoncv.utils.filesystem import try_import_cv2
  cv2 = try_import_cv2()
  fourcc = cv2.VideoWriter_fourcc(*'FMP4')

  file_list = os.listdir(image_path)
  file_list.sort()

  out = cv2.VideoWriter('{}/{}'.format(output_path, output_filename), fourcc, fps/step, (width, height))
  index = 0 # image id(for skip)
  for filename in tqdm(file_list):
    # step 값으로 이미지 skip
    if (index % step) != 0: 
      index += 1
      continue
    try:
      filename = '{}/{}'.format(image_path,filename)
      # print(filename)
      img = display_detection(detector, pose_net, filename)
      frame = cv2.resize(img, dsize=(width, height), interpolation=cv2.INTER_AREA)
      frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
      out.write(frame) 
    except Exception as ex:
      pass
    index += 1 # image id(for skip)
  out.release()

### 박스 걷어내기 


gluoncv util 코드를 수정 후 런타임 다시시작한다

keypoints.py 파일의
cv_plot_keypoints 함수에서  
cv_plot_bbox 함수를 수행하는 부분을 주석처리한다
(130~131 line)

In [None]:
# img = utils.viz.cv_plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2)

In [None]:
width=640
height=360
fps=25.0

### pose estimation

In [None]:
get_pose_video(detector, pose_net, 'kkang_1', 'kkang_pose', 'kkang_1_pose.mp4', width, height,fps, 3)

In [None]:
get_pose_video(detector, pose_net, 'kkang_2', 'kkang_pose', 'kkang_2_pose.mp4', width, height,fps, 3)

In [None]:
get_pose_video(detector, pose_net, 'kkang_3', 'kkang_pose', 'kkang_3_pose.mp4', width, height,fps, 3)

In [None]:
get_pose_video(detector, pose_net, 'kkang_4', 'kkang_pose', 'kkang_4_pose.mp4', width, height,fps, 3)

## golf swing 에서 pose 추출

#### 1. 영상 다운로드

In [None]:
swing = get_youtube_video('https://www.youtube.com/watch?v=ik85VFcig3o')
print(swing)

#### 2. 이미지 저장

In [None]:
# !rm -rf swing*

In [None]:
width, height, fps, frame_count = read_video(swing, './swing')

#### 3. 압축(다운로드 및 확인용도)

In [None]:
compress_folder('./swing')

#### 4. 영상 구간 이미지로 저장

In [None]:
width, height, fps, frame_count = read_video_by_frame(swing, './swing_1', 442, 872, 1)

In [None]:
width, height, fps, frame_count = read_video_by_frame(swing, './swing_2', 889, 1111, 1)

#### 5. 구간 별 영상 생성(optional)

In [None]:
make_video_file('swing_1', 'swing_split', 'swing_1.mp4', 640, 360, 30.0, 1)

In [None]:
make_video_file('swing_2', 'swing_split', 'swing_2.mp4', 640, 360, 30.0, 1)

#### 6. pose estimation

In [None]:
width=640
height=360
fps=30.0

In [None]:
get_pose_video(detector, pose_net, 'swing_1', 'swing_pose', 'swing_1_pose.mp4', width, height,fps, 3)

In [None]:
get_pose_video(detector, pose_net, 'swing_2', 'swing_pose', 'swing_2_pose.mp4', width, height,fps, 3)

# 실습

## 1. 디스코 영상을 이용하여 구간별 영상을 만든다

In [None]:
disco = get_youtube_video('https://www.youtube.com/watch?v=Xpaw_SezTTo')
print(disco)

## 2. 디스코 구간별 영상에서 pose를 추출한다.