<a href="https://colab.research.google.com/github/anindita/dances/blob/main/run_dances.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Step 0: Set up HRNet utils


In [None]:
!git clone https://github.com/leoxiaobin/deep-high-resolution-net.pytorch.git


In [None]:
%%bash

cd deep-high-resolution-net.pytorch/
pip install -r requirements.txt

cd lib
make

In [None]:
!git clone https://github.com/cocodataset/cocoapi.git

In [None]:
%%bash

cd cocoapi/PythonAPI
make install


In [None]:
%%bash

cd deep-high-resolution-net.pytorch/
mkdir output 
mkdir log
mkdir data

cd data && mkdir mpii
cd mpii && mkdir images && mkdir annot

In [None]:
# %%bash

# cd deep-high-resolution-net.pytorch/output
# mkdir mpii && cd mpii
# mkdir pose_hrnet
# mkdir pose_resnet

# Step 0.5: Variables


In [None]:
# Variables for 2570
import numpy as np
import cv2

video_size = (320, 240)
init_scale = '0.7'
scale = '0.6'

dataset = '2570'

colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
          [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
          [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]

In [None]:
# Variables for 3000
# import numpy as np
# import cv2

# video_size = (1280, 720)
# scale = '3'

# dataset = '3000'

# colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
#           [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
#           [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]

# Step 1: Mount drive & download video (see cells)

Please ensure you have added the `diving_models` [Drive folder](https://drive.google.com/drive/folders/1nDXZKhvn9TsEjeT6d2pdKsxB2afQqBsp?usp=sharing) to your `MyDrive` directory.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
YOUTUBE_ID = 'F_mmLMTFMag'

!pip install -q youtube-dl
!rm youtube.mp4
!youtube-dl -f 'bestvideo[ext=mp4]' --output "youtube.%(ext)s" https://www.youtube.com/watch?v=$YOUTUBE_ID
!ffmpeg -y -loglevel info -i youtube.mp4 -ss 00:00:41 -t 2 video.mp4

# Step 2: Create images from video

In [None]:
import cv2
import os

cam = cv2.VideoCapture("video.mp4")
currentframe = 0
  
while(True):
    ret,frame = cam.read()
  
    if ret:
        name = '/content/deep-high-resolution-net.pytorch/data/mpii/images/frame' + str(currentframe) + '.jpg'
        print ('Creating...' + name)
  
        frame = cv2.resize(frame, (1280,720))
        frame = frame[0:720, 160:1120]
        frame = cv2.resize(frame, video_size)
        cv2.imwrite(name, frame)

        currentframe += 1
    else:
        break
  
cam.release()
cv2.destroyAllWindows()

In [None]:
!cp --verbose -R /content/drive/MyDrive/diving_models /content/deep-high-resolution-net.pytorch/


import json
import random

valid = []
index = 0

with open('deep-high-resolution-net.pytorch/diving_models/sample.json') as json_data:
  data = json.load(json_data)
  img = data[0]
  for i in range(currentframe):
    new_img = img.copy()
    new_img["image"] = 'frame' + str(i) + '.jpg'
    new_img["scale"] = scale
    valid.append(new_img)

  for i in range(3):
    valid[i]["scale"] = init_scale
    valid[-i]["scale"] = init_scale

with open('deep-high-resolution-net.pytorch/data/mpii/annot/valid.json', 'w') as f:
    json.dump(valid, f, ensure_ascii=False, indent=4)

In [None]:
from scipy.io import loadmat, savemat

gt_dict = loadmat('deep-high-resolution-net.pytorch/diving_models/sample.mat')
dataset_joints = gt_dict['dataset_joints']
file_name = 'deep-high-resolution-net.pytorch/data/mpii/annot/valid.json'

## Missing joint data

with open(file_name) as json_data:
  data = json.load(json_data)

  joint_missing = [[] for joint in data[0]['joints_vis']]

  for i,img in enumerate(data):
    for j,jnt in enumerate(img['joints_vis']):
      if jnt == 0:
        joint_missing[j].append(1)
      else:
        joint_missing[j].append(0)

## Positive ground truth sources

with open(file_name) as json_data:
  data = json.load(json_data)

  ground_truth = [[[] for j in jnt] for jnt in data[0]['joints']]

  for i,img in enumerate(data):
    for j,jnt in enumerate(img['joints']): # 16 joints
      ground_truth[j][0].append(jnt[0])
      ground_truth[j][1].append(jnt[1])

## Headboxes

import math

with open(file_name) as json_data:
  data = json.load(json_data)

  heads = [[[] for coord in range(2)] for jnt in range(2)]
  # bttm l x, bttm l y, top r x, top r y

  for i,img in enumerate(data):
    neck_x = img['joints'][8][0]
    neck_y = img['joints'][8][1]
    head_x = img['joints'][9][0]
    head_y = img['joints'][9][1]

    half = math.sqrt((neck_x - head_x) ** 2 + (neck_y - head_y) ** 2) / 2

    mid_x = (neck_x + head_x) / 2
    mid_y = (neck_y + head_y) / 2

    bml_x = int(min(mid_x - half, neck_x, head_x))
    bml_y = int(min(mid_y - half, neck_y, head_y))
    tpr_x = int(max(mid_x + half, neck_x, head_x))
    tpr_y = int(max(mid_y + half, neck_y, head_y))

    heads[0][0].append(bml_x)
    heads[0][1].append(bml_y)
    heads[1][0].append(tpr_x)
    heads[1][1].append(tpr_y)


savemat("deep-high-resolution-net.pytorch/data/mpii/annot/gt_valid.mat", mdict={'dataset_joints': dataset_joints, 'jnt_missing': joint_missing, 'pos_gt_src': ground_truth, 'headboxes_src': heads})

# Step 3: Run Models

In [None]:
!pip install yacs
!pip install json_tricks
!pip install tensorboardX

In [None]:
%%bash

cp --verbose /content/deep-high-resolution-net.pytorch/diving_models/test.py /content/deep-high-resolution-net.pytorch/tools

In [None]:
def run_test(model_name, cfg='experiments/mpii/resnet/res101_256x256_d256x3_adam_lr1e-3.yaml', out='resnet/res101_256x256_d256x3_adam_lr1e-3', backbone='resnet'):
  
  best_path = f'/content/deep-high-resolution-net.pytorch/diving_models/{model_name}_model_best.pth'
  final_path = f'/content/deep-high-resolution-net.pytorch/diving_models/{model_name}_final_state.pth'

  !cd /content/deep-high-resolution-net.pytorch && python tools/test.py --cfg $cfg TEST.MODEL_FILE $best_path

  matlab = f'/content/deep-high-resolution-net.pytorch/output/mpii/pose_{out}/pred.mat'
  best_mat = f'/content/{model_name}_{scale}_best.mat'
  !mv $matlab $best_mat

  !cd /content/deep-high-resolution-net.pytorch && python tools/test.py --cfg $cfg TEST.MODEL_FILE $final_path
  final_mat = f'/content/{model_name}_{scale}_final.mat'
  !mv $matlab $final_mat

In [None]:
backbone = 'hrnet'
out = 'w32_256x256_adam_lr1e-3'
run_test(f'w32_{dataset}', f'experiments/mpii/{backbone}/{out}.yaml', f'{backbone}/{out}', backbone)
out = 'w48_256x256_adam_lr1e-3'
run_test(f'w48_{dataset}', f'experiments/mpii/{backbone}/{out}.yaml', f'{backbone}/{out}', backbone)

# Step 4: Model Predictions

In [None]:
from scipy.io import loadmat, savemat
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from google.colab.patches import cv2_imshow
import json


model_1 = loadmat(f'/content/w32_{dataset}_{scale}_best.mat')
model_2 = loadmat(f'/content/w32_{dataset}_{scale}_final.mat')
model_3 = loadmat(f'/content/w48_{dataset}_{scale}_best.mat')
model_4 = loadmat(f'/content/w48_{dataset}_{scale}_final.mat')

# The joints correspond to the model which has that joint's highest accuracy
models = [model_1, model_2, model_3, model_4]
joints = [1,2,2,2,2,1,2,1,1,1,3,1,0,0,1,3]

out = cv2.VideoWriter('model_output.mp4',cv2.VideoWriter_fourcc(*'DIVX'), 15, video_size)
results = []

for i in range(len(model_1['preds'])):

  frame_res = []
  img = cv2.imread(f'/content/deep-high-resolution-net.pytorch/data/mpii/images/frame{i}.jpg')

  for j in range(len(model_1['preds'][0])):
    model = models[joints[j]]
    x = int(model['preds'][i][j][0])
    y = int(model_1['preds'][i][j][1])
    cv2.circle(img, (x, y), 3, colors[j], -1)
    frame_res.append([x,y])

  cv2_imshow(img)
  out.write(img)
  results.append(frame_res)

cv2.destroyAllWindows()
out.release()

with open('data.json', 'w') as f:
    json.dump(results, f)

# Step 4.5: Kalman Filtering

In [None]:
!pip install tsmoothie

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import json
from google.colab.patches import cv2_imshow
from tsmoothie.utils_func import sim_randomwalk
from tsmoothie.smoother import *

p = 16

with open('data.json') as json_file:
  loaded = json.load(json_file)
  scaled = np.array([[[x * 3 for x in y] for y in z] for z in loaded])

def smooth(p):
  xs = []
  ys = []
  for keypoints in scaled:
    sing = np.ravel(keypoints[p])
    xs.append(sing[0])
    ys.append(sing[1])
  data = [xs,ys]
  data = np.array(data)

  # Kalman Smoothing
  smoother = KalmanSmoother(component='level_trend', 
                            component_noise={'level':0.5, 'trend':1})
  smoother.smooth(data)
  low, up = smoother.get_intervals('kalman_interval', confidence=0.05)


  # plot the first smoothed timeseries with intervals
  # plt.figure(figsize=(11,6))
  # plt.plot(smoother.smooth_data[0], linewidth=3, color='blue')
  # plt.plot(smoother.data[0], '.k')
  # plt.fill_between(range(len(smoother.data[0])), low[0], up[0], alpha=0.1)

  return smoother.smooth_data


smooths = []

for i in range(p):
  smooths.append(smooth(i))


In [None]:

out = cv2.VideoWriter('kalman_video.mp4',cv2.VideoWriter_fourcc(*'DIVX'), 10, (960,720))
results = []
name = 'kalman'

for i, keypoints in enumerate(scaled):
  frame = cv2.imread(f'/content/deep-high-resolution-net.pytorch/data/mpii/images/frame{i}.jpg')
  frame_res = []
  for j in range(p):
    #cv2.circle(frame, (int(keypoints[j][0]), (int(keypoints[j][1]))), 5, colors[j], 2)
    x = int(smooths[j][0][i] / 3)
    y = int(smooths[j][1][i] / 3)
    cv2.circle(frame, (x, y), 3, colors[j], -1)
    frame_res.append([x,y])
  results.append(frame_res)
  out.write(frame)
  cv2_imshow(frame)

cv2.destroyAllWindows()
out.release()

with open(f'{name}.json', 'w') as f:
    json.dump(results, f)

# Step 5: Optical Flow

In [None]:
from scipy.io import loadmat, savemat
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import math
from google.colab.patches import cv2_imshow

num_imgs = len(smooths[0][0])
num_joints = len(smooths)

# Parameters for lucas kanade optical flow
lk_params = dict( winSize  = (10,10),
                  maxLevel = 2,
                  criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 5, 0.03))

def get_keypoints(frame_num):
  keypoints = []
  for i in range(num_joints):
    x = smooths[i][0][frame_num] / 3
    y = smooths[i][1][frame_num] / 3
    keypoints.append([[x,y]])
  return np.array(keypoints, dtype=np.float32)

predictions = []

def annot_img(img, keypoints):
  for i, ps in enumerate(keypoints):
    x,y = ps.ravel()
    predictions.append([int(x),int(y)])
    cv2.circle(img, (x, y), 3, colors[i], -1)

def dist(x1,y1,x2,y2):
  return math.hypot(x2 - x1, y2 - y1)

def analyse_points(prev_ps, opt_ps, opt_st, curr_ps):
  keypoints = []
  for i, ps in enumerate(curr_ps):
    x, y = ps.ravel()

    if opt_st[i][0] == 0:
      keypoints.append([[x,y]])
      continue

    prev_x, prev_y = prev_ps[i].ravel()
    opt_x, opt_y = opt_ps[i].ravel()

    curr_dist = dist(prev_x, prev_y, x, y)
    opt_dist = dist(prev_x, prev_y, opt_x, opt_y)

    if curr_dist - opt_dist < 46:
      keypoints.append([[x,y]])
    else:
      keypoints.append([[opt_x, opt_y]])

    #keypoints.append([[x,y]])

  return np.array(keypoints, dtype=np.float32)


p0 = get_keypoints(0)
kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
out = cv2.VideoWriter('output_video.mp4',cv2.VideoWriter_fourcc(*'DIVX'), 15, video_size)

for i in range(1, num_imgs):

  old_img = cv2.imread(f'/content/deep-high-resolution-net.pytorch/data/mpii/images/frame{i-1}.jpg')
  img = cv2.imread(f'/content/deep-high-resolution-net.pytorch/data/mpii/images/frame{i}.jpg')

  old_gray = cv2.cvtColor(old_img, cv2.COLOR_BGR2GRAY)
  old_gray = cv2.filter2D(old_gray, -1, kernel)
  img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  img_gray = cv2.filter2D(img_gray, -1, kernel)

  p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, img_gray, p0, None, **lk_params)

  curr_points = get_keypoints(i)
  new_keypoints = analyse_points(p0, p1, st, curr_points)
  annot_img(img, new_keypoints)

  p0 = new_keypoints.copy()
  
  cv2_imshow(img)
  out.write(img)
  
cv2.destroyAllWindows()
out.release()


with open('final_points.json', 'w') as f:
    json.dump(predictions, f)

# Optional: Zip Images

In [None]:
import shutil

shutil.make_archive("images",'zip',"/content/deep-high-resolution-net.pytorch/data/mpii/images")