In [9]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import json
import imageio as io

import argparse

from bpe.functional.motion import preprocess_motion2d_rc, cocopose2motion, json2annotations, annotations2motion

from bpe import Config

from bpe.similarity_analyzer import SimilarityAnalyzer

from bpe.functional.motion import preprocess_motion2d_rc, cocopose2motion

from bpe.functional.utils import pad_to_height

from bpe.functional.visualization import preprocess_sequence, video_out_with_imageio

data_dir = 'bpe-datasets/SARA_released'
model_path = 'train_log/exp_bpe/model/model_best.pth.tar'
video1 = ''
video2 = ''
# v1 = 'bpe-datasets/refined_skeleton/007/S001C001P004R001A007.json'
# v2 = 'bpe-datasets/refined_skeleton/007/S001C001P005R002A007.json'
v1 = 'attack_pose/json_files/GX010032_Clip_10_sec_24_POSE_tabel.json'
v2 = 'attack_pose/json_files/GX010033_Clip_6_sec_14_POSE_tabel.json'
img1_height = 1080
img1_width = 1920
img2_height = 1080
img2_width = 1920
pad2 = 0

parser = argparse.ArgumentParser()
parser.add_argument('--name', type=str, default="sim_test", help="task name")
parser.add_argument('--data_dir', default="", required=True, help="path to dataset dir")
parser.add_argument('--model_path', type=str, required=True, help="filepath for trained model weights")
parser.add_argument('--video1', type=str, required=True, help="video1 mp4 path", default=None)
parser.add_argument('--video2', type=str, required=True, help="video2 mp4 path", default=None)
parser.add_argument('-v1', '--vid1_json_dir', type=str, required=True, help="video1's coco annotation json")
parser.add_argument('-v2', '--vid2_json_dir', type=str, required=True, help="video2's coco annotation json")
parser.add_argument('-h1', '--img1_height', type=int, help="video1's height", default=480)
parser.add_argument('-w1', '--img1_width', type=int, help="video1's width", default=854)
parser.add_argument('-h2', '--img2_height', type=int, help="video2's height", default=480)
parser.add_argument('-w2', '--img2_width', type=int, help="video2's width", default=854)
parser.add_argument('-pad2', '--pad2', type=int, help="video2's start frame padding", default=0)
parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False)
parser.add_argument('--out_path', type=str, default='./visual_results', required=False)
parser.add_argument('--out_filename', type=str, default='twice.mp4', required=False)
parser.add_argument('--use_flipped_motion', action='store_true',
                    help="whether to use one decoder per one body part")
parser.add_argument('--use_invisibility_aug', action='store_true',
                    help="change random joints' visibility to invisible during training")
parser.add_argument('--debug', action='store_true', help="limit to 500 frames")
# related to video processing
parser.add_argument('--video_sampling_window_size', type=int, default=16,
                    help='window size to use for similarity prediction')
parser.add_argument('--video_sampling_stride', type=int, default=16,
                    help='stride determining when to start next window of frames')
parser.add_argument('--use_all_joints_on_each_bp', action='store_true',
                    help="using all joints on each body part as input, as opposed to particular body part")

parser.add_argument('--similarity_measurement_window_size', type=int, default=1,
                    help='measuring similarity over # of oversampled video sequences')
parser.add_argument('--similarity_distance_metric', choices=["cosine", "l2"], default="cosine")
parser.add_argument('--privacy_on', action='store_true',
                    help='when on, no original video or sound in present in the output video')
parser.add_argument('--thresh', type=float, default=0.5, help='threshold to seprate positive and negative classes')
parser.add_argument('--connected_joints', action='store_true', help='connect joints with lines in the output video')


args = parser.parse_args([
    '--data_dir', data_dir, 
    '--model_path', model_path,
    '--video1', video1,
    '--video2', video2,
    '-v1', v1,
    '-v2', v2,
    '--img1_height', str(img1_height),
    '--img1_width', str(img1_width),
    '--img2_height', str(img2_height),
    '--img2_width', str(img2_width),
    '--pad2', str(pad2)
    ])

In [10]:
mean_pose_bpe = np.load(os.path.join(data_dir, 'meanpose_rc_with_view_unit64.npy'))
std_pose_bpe = np.load(os.path.join(data_dir, 'stdpose_rc_with_view_unit64.npy'))
mean_pose_bpe.shape, std_pose_bpe.shape

# if not os.path.exists(args.out_path):
#         os.makedirs(args.out_path)

config = Config(args)
similarity_analyzer = SimilarityAnalyzer(config, args.model_path)

# for NTU-RGB test - it used w:1920, h:1080
h1, w1, scale1 = pad_to_height(config.img_size[0], args.img1_height, args.img1_width)
h2, w2, scale2 = pad_to_height(config.img_size[0], args.img2_height, args.img2_width)
h1, w1, scale1, h2, w2, scale2

# get input suitable for motion similarity analyzer
seq1 = cocopose2motion(config.unique_nr_joints, args.vid1_json_dir, scale=scale1,
                        visibility=args.use_invisibility_aug)
seq2 = cocopose2motion(config.unique_nr_joints, args.vid2_json_dir, scale=scale2,
                        visibility=args.use_invisibility_aug)[:, :, args.pad2:]
seq1.shape, seq2.shape

seq1 = preprocess_sequence(seq1)
seq2 = preprocess_sequence(seq2)

seq1_origin = preprocess_motion2d_rc(seq1, mean_pose_bpe, std_pose_bpe,
                                        invisibility_augmentation=args.use_invisibility_aug,
                                        use_all_joints_on_each_bp=args.use_all_joints_on_each_bp)
seq2_origin = preprocess_motion2d_rc(seq2, mean_pose_bpe, std_pose_bpe,
                                        invisibility_augmentation=args.use_invisibility_aug,
                                        use_all_joints_on_each_bp=args.use_all_joints_on_each_bp)

seq1.shape, seq2.shape, seq1_origin.shape, seq2_origin.shape

seq1_origin = seq1_origin.to(config.device)
seq2_origin = seq2_origin.to(config.device)

# get embeddings
seq1_features = similarity_analyzer.get_embeddings(seq1_origin, video_window_size=args.video_sampling_window_size,
                                                       video_stride=args.video_sampling_stride)
seq2_features = similarity_analyzer.get_embeddings(seq2_origin, video_window_size=args.video_sampling_window_size,
                                                       video_stride=args.video_sampling_stride)

# get motion similarity
motion_similarity_per_window = \
    similarity_analyzer.get_similarity_score(seq1_features, seq2_features,
                                                similarity_window_size=args.similarity_measurement_window_size)

motion_similarity_per_window

Building model
Loading model from train_log/exp_bpe/model/model_best.pth.tar
Model is ready


[{'ra': 0.9466005563735962,
  'la': 0.45262736082077026,
  'rl': 0.9769474267959595,
  'll': 0.9909589290618896,
  'torso': 0.6687353253364563},
 {'ra': 0.9531256556510925,
  'la': 0.2960130274295807,
  'rl': 0.9530953764915466,
  'll': 0.9929720759391785,
  'torso': 0.5830177068710327},
 {'ra': 0.9106404185295105,
  'la': 0.7238216400146484,
  'rl': 0.958930492401123,
  'll': 0.9873378872871399,
  'torso': 0.3882153034210205},
 {'ra': 0.708549976348877,
  'la': 0.504763662815094,
  'rl': 0.9712213277816772,
  'll': 0.9660616517066956,
  'torso': 0.8233088254928589},
 {'ra': 0.8118758201599121,
  'la': 0.9140557050704956,
  'rl': 0.9199668169021606,
  'll': 0.9897566437721252,
  'torso': 0.9306679964065552},
 {'ra': 0.9598767161369324,
  'la': 0.6277140378952026,
  'rl': 0.921382486820221,
  'll': 0.9378729462623596,
  'torso': 0.913017213344574},
 {'ra': 0.8186538219451904,
  'la': 0.7713695764541626,
  'rl': 0.6813949346542358,
  'll': 0.966238260269165,
  'torso': 0.9286928176879883