In [None]:
import sys
from glob import glob
from os import path as osp
from datetime import datetime
from tqdm import tqdm
from skimage import io, transform
from matplotlib import pyplot as plt
import numpy as np
from math import sqrt
from statistics import mean
# torch imports
import torch
from torchvision import transforms
from torch.utils.data.dataloader import DataLoader

# root path of project
from os import path as osp
import sys

# get root directory
import re
reg = '^.*/AquaPose'
project_root = re.findall(reg, osp.dirname(osp.abspath(sys.argv[0])))[0]
sys.path.append(project_root)

from lib.dataset.PoseDataset import PoseDataset

from lib.models.keypoint_rcnn import get_resnet50_pretrained_model

# utils
from lib.utils.slack_notifications import slack_message
from lib.utils.select_gpu import select_best_gpu
from lib.utils.rmsd import kabsch_rmsd, kabsch_rotate, kabsch_weighted_rmsd, centroid, centroid_weighted, rmsd, rmsd_weighted

# references import
# source: https://github.com/pytorch/vision/tree/master/references/detection
from references.engine import train_one_epoch, evaluate
from references.utils import collate_fn

from references.transforms import RandomHorizontalFlip

from lib.matching.matching import *
from lib.utils.visual_utils import *

## Load dataset with reference poses

In [None]:
# load dataset to get a set of poses to match to
ref_dataset = PoseDataset([osp.join(project_root,'data/vzf/freestyle/freestyle_1'), osp.join(project_root,'data/vzf/freestyle/freestyle_2'), osp.join(project_root,'data/vzf/freestyle/freestyle_3'), osp.join(project_root,'data/vzf/freestyle/freestyle_4')], train=False)

test_dataset = PoseDataset([osp.join(project_root,'data/vzf/freestyle/freestyle_5'), osp.join(project_root,'data/vzf/freestyle/freestyle_6')], train=False)

## Load model and weights for predictions

In [None]:
weight_dir = osp.join(project_root, 'weights')
weight_files = glob(osp.join(weight_dir,'*'))
model = get_resnet50_pretrained_model()
print(weight_files)
model.load_state_dict(torch.load(weight_files[-1], map_location=torch.device('cpu')))


## Display utils

In [None]:
def tensor_to_numpy_image(img_tensor):
    return img_tensor.permute(1,2,0).detach().numpy()

def get_max_prediction(prediction):
    keypoints_scores = prediction[0]['keypoints_scores']
    boxes = prediction[0]['boxes']
    labels = prediction[0]['labels']
    scores = prediction[0]['scores']
    keypoints = prediction[0]['keypoints']

    max_score = 0
    max_box = []
    for idx, box in enumerate(boxes):
        if scores[idx].item() > max_score:
            max_score = scores[idx].item()
            max_box = box
            max_keypoints = keypoints[idx] 
            max_keypoints_scores = keypoints_scores[idx]
    
    return max_box.detach().numpy(), max_keypoints.detach().numpy(), max_keypoints_scores.detach().numpy()


def plot_image_with_kps(img_tensor, kps_list, color_list= ['b', 'r', 'g']):
    # plot positive prediction
    fig, ax = plt.subplots()
    plt.imshow(tensor_to_numpy_image(img_tensor))
    for kps, clr in zip(kps_list, color_list):
        ax.scatter(np.array(kps)[:,0],np.array(kps)[:,1], s=10, marker='.', c=clr)

## Matching specific functions

In [None]:
def merge_head(kps):
    return kps[4:]

# occluded=False will only use visible gt points
# side = right/left will only use those keypoints
# lr confusion filtering will remove the far keypoints if they are predicted to close to the symmetric close keypoints
def filter_kps(pred_kps, ref_kps, scores, min_score=0, occluded=True, side = None, filter_lr_confusion=False):

    filter_ind = np.argwhere(scores > min_score).flatten()

    # Reduce left right confusion by filtering out far elbows and wrists that are estimated too close
    # to their left counterpart
    if filter_lr_confusion:
        # get orientation of swimmer
        # upper body keypoints: head, left_shoulder, right shoulder
        upper_ind = [0, 1, 2]
        # lower body keyponts: left hip, right hip, left knee, right knee
        lower_ind = [7, 8, 9, 10]

        upper_ind_vis = np.intersect1d(upper_ind, filter_ind)
        lower_ind_vis = np.intersect1d(lower_ind, filter_ind)

        # only possible if one upper and one lower joint is visible
        if len(upper_ind_vis) > 0 and len(lower_ind_vis) > 0:
            # get mean x-co for upper and lower body
            upper_x = mean([kp[0] for kp in pred_kps[upper_ind_vis]])
            lower_x = mean([kp[0] for kp in pred_kps[lower_ind_vis]])

            if upper_x < lower_x:
                orientation = 'left'
            else:
                orientation = 'right'
            
            # [[left_elbow, right_elbow], [left_wrist, right_wrist]]
            for joints in [[3,4], [5,6]]:
                # if one of the joints is not present in filter ind
                # do nothing
                if joints[0] not in filter_ind or joints[1] not in filter_ind:
                    continue
                left_joint = pred_kps[joints[0]]
                right_joint = pred_kps[joints[1]]

                if rmsd_weighted(left_joint, right_joint, weights=[1]) < 5:
                    if orientation == 'left':
                        # filter out right joint
                        filter_ind = filter_ind[filter_ind != joints[1]]
                    else:
                        filter_ind = filter_ind[filter_ind != joints[0]] 

    if not occluded:
        not_occluded = np.argwhere(ref_kps[:,2] > 0).flatten()
        filter_ind = np.intersect1d(filter_ind, not_occluded)

    if side == 'left':
        left_ind = [0, 1 ,3 ,5 ,7 ,9, 11, 13, 15]
        filter_ind = np.intersect1d(filter_ind, left_ind)
    elif side == 'right':
        right_ind = [0, 2, 4, 6, 8, 10, 12, 14, 16]
        filter_ind = np.intersect1d(filter_ind, right_ind)

    return filter_ind

# pass unfiltered kps lists and weights
def do_kabsch_transform(pred_kps, ref_kps, translat_weights=None, filter_ind=None):   
    assert len(pred_kps) == len(ref_kps)
    
    pred_kps_np = np.array(pred_kps)
    ref_kps_np = np.array(ref_kps)

    if translat_weights is None:
        translat_weights_np = np.array([1] * len(ref_kps))
    else:
        assert len(pred_kps_np) == len(translat_weights)
        translat_weights_np = np.array(translat_weights)

    if filter_ind is None:
        filter_ind = np.array([i for i in range(0, len(ref_kps_np))])

    P = np.array([[kp[0], kp[1], 1] for kp in pred_kps_np[filter_ind]])
    Q = np.array([[kp[0], kp[1], 1] for kp in ref_kps_np[filter_ind]])
    weights = translat_weights_np[filter_ind]

    # same pose in opposite direction (no scaling so kabsch cannot do this)


    # TODO Swap keypoints left right!

    # use reflected pose if this leads to smaller distance
    # TODO this does not really reflect the actual value with weights and strict disctinctin between
    # translation and rotation

    QC = centroid_weighted(Q, weights)
    Q = Q - QC
    P = P - centroid_weighted(P, weights)
    P = kabsch_rotate(P, Q) + QC

    return P

# unfiltered pred_kps and ref_kps
def get_kabsch_distance(pred_kps, ref_kps, filter_ind = None, translat_weights=None, pose_similarity_weights=None):
    assert len(pred_kps) == len(ref_kps)
    
    pred_kps_np = np.array(pred_kps)
    ref_kps_np = np.array(ref_kps)

    if translat_weights is None:
        translat_weights_np = np.array([1] * len(ref_kps))
    else:
        assert len(pred_kps) == len(translat_weights)
        translat_weights_np = np.array(translat_weights)
    
    if pose_similarity_weights is None:
        pose_similarity_weights = np.array([1] * len(ref_kps))

    if filter_ind is None:
        filter_ind = np.array([i for i in range(0, len(ref_kps_np))])

    Q = ref_kps_np[filter_ind]

    P = do_kabsch_transform(pred_kps, ref_kps, filter_ind=filter_ind, translat_weights=translat_weights)
    return rmsd_weighted(P, Q, weights=pose_similarity_weights[filter_ind])

# DEPRECATED
# def get_affine_tf(pred_kps, ref_kps):
#     # make sure the visibility flag is 1 always (necessary for tf)
#     ref_kps_vis = [[kp[0], kp[1], 1] for kp in ref_kps]

#     A, res, rank, s = np.linalg.lstsq(pred_kps, ref_kps_vis)
#     return A

# def warp_kp(kps, tf_mat):
#     return np.dot(kps, tf_mat)

def kabsch_similarity_score(pred_kps, ref_kps, filter_ind=None, translat_weights=None, threshold_pct = 0.3 , weights = None):

    kabsch_kps = do_kabsch_transform(pred_kps,
        ref_kps,
        translat_weights= translat_weights,
        filter_ind= filter_ind
    )

    return similarity_score(kabsch_kps, ref_kps, 
        filter_ind = filter_ind,
        threshold_pct=threshold_pct,
        weights = weights
    )

# all incoming kps are unfiltered, EXCEPT kabsch_kps (because it is generated by the kabsch transform which automatically filters)
def similarity_score(kabsch_kps, ref_kps, filter_ind = None , threshold_pct = 0.3 , weights = None):
    kabsch_kps_np = np.array(kabsch_kps)
    ref_kps_np = np.array(ref_kps)

    if weights is None:
        weights = [1] * len(ref_kps_np)
    
    if filter_ind is None:
        filter_ind = np.array([i for i in range(0, len(ref_kps_np))])
    
    # get length of torso
    # get mean of shoulders
    mean_shoulder_x = mean([kp[0] for kp in ref_kps_np[[1,2]]])
    mean_shoulder_y = mean([kp[1] for kp in ref_kps_np[[1,2]]])
    # get mean of hips
    mean_hip_x = mean([kp[0] for kp in ref_kps_np[[7,8]]])
    mean_hip_y = mean([kp[1] for kp in ref_kps_np[[7,8]]])

    x_diff = abs(mean_shoulder_x - mean_hip_x)
    y_diff = abs(mean_shoulder_y - mean_hip_y)
    torso_length = sqrt(x_diff**2 + y_diff**2)

    # max distance to score points
    max_dist = threshold_pct * torso_length 

    ref_kps_ftrd = np.array(ref_kps_np)[filter_ind]
    weights_ftrd = np.array(weights)[filter_ind]

    #print('pred_kps_ftrd: {}'.format(pred_kps_ftrd))
    #print('gt_kps_ftrd: {}'.format(gt_kps_ftrd))

    assert len(kabsch_kps_np) == len(ref_kps_ftrd)

    #print('max_dist: {}'.format(max_dist))
    score = 0
    for pred_kp, ref_kp, weight in zip(kabsch_kps_np, ref_kps_ftrd, weights_ftrd):
        dist = sqrt((pred_kp[0] - ref_kp[0])**2 + (pred_kp[1] - ref_kp[1])**2)
        score += weight * max([max_dist - dist, 0])/max_dist
    
    return score

In [None]:
def get_most_similar_ind_and_scores(pred_kps, pred_scores, ref_dataset,
    num=10,
    plot=False,
    min_score=0,
    filter_lr_confusion=False,
    occluded=False,
    threshold_pct= 0.3,
    translat_weights = None,
    kp_weights = None):

    # merge left/right ear, left/right eye and nose int head
    pred_kps_merged = merge_head(pred_kps)
    pred_scores_merged = merge_head(pred_scores)

    # empty list of scores
    sim_scores = []

    # True if best score was achieved by flipping
    flipped = []

    # make horizontal flip tranformation
    flip = RandomHorizontalFlip(1.0)

    # go through all images of dataset to find best matches
    for ref_id, (ref_img, ref_target) in tqdm(enumerate(ref_dataset)):
        
        # get gt keypoint annotations
        ref_kps = ref_target['keypoints'][0].detach().numpy()
        ref_kps_merged = merge_head(ref_kps)

        # get flipped gt keypoint annotations
        ref_img_flipped, ref_target_flipped = flip(ref_img, ref_target)
        ref_kps_flipped = ref_target_flipped['keypoints'][0].detach().numpy()
        ref_kps_flipped_merged = merge_head(ref_kps_flipped)

        # filter according to parameters
        filter_ind = filter_kps(pred_kps_merged, ref_kps_merged, pred_scores_merged, 
            min_score= min_score,
            occluded = occluded,
            filter_lr_confusion = filter_lr_confusion
            )

        filter_ind_flipped =filter_kps(pred_kps_merged, ref_kps_flipped_merged, pred_scores_merged, 
            min_score= min_score,
            occluded = occluded,
            filter_lr_confusion = filter_lr_confusion
            )

        
        score = kabsch_similarity_score(pred_kps_merged, ref_kps_merged,
            filter_ind=filter_ind,
            translat_weights=translat_weights,
            threshold_pct=threshold_pct,
            weights=kp_weights
        )

        score_flipped = kabsch_similarity_score(pred_kps_merged, ref_kps_flipped_merged,
            filter_ind=filter_ind_flipped,
            translat_weights=translat_weights,
            threshold_pct=threshold_pct,
            weights=kp_weights
        )

        # add best score and keep track of whether it was by flipping
        if score > score_flipped:
            sim_scores.append(score)
            flipped.append(False)
        else:
            sim_scores.append(score_flipped)
            flipped.append(True)
        
        
    # get indices and score of most similar
    most_similar_ind = np.argsort(sim_scores)[::-1][:num]
    most_similar_scores = np.array(sim_scores)[most_similar_ind]
    most_similar_flipped = np.array(flipped)[most_similar_ind]

    if plot:
        for ind, score, is_flipped in zip(most_similar_ind, most_similar_scores, most_similar_flipped):

            print('dataset index: {}, score: {}, flipped: {}'.format(ind,score,is_flipped))

            ref_img, ref_target = ref_dataset[ind]

            if not is_flipped:
                
                ref_kps = ref_target['keypoints'][0].detach().numpy()
                ref_kps_merged = merge_head(ref_kps)

                # filter again
                filter_ind = filter_kps(pred_kps_merged, ref_kps_merged, pred_scores_merged, 
                    min_score= min_score,
                    occluded = occluded,
                    filter_lr_confusion = filter_lr_confusion
                )

                # get kabsch kps
                kabsch_kps = do_kabsch_transform(pred_kps_merged, ref_kps_merged, 
                    translat_weights=translat_weights,
                    filter_ind=filter_ind
                )

                plot_image_with_kps(ref_img, [kabsch_kps, ref_kps_merged[filter_ind]], ['r', 'k'])
            
            else:
                # get flipped gt keypoint annotations
                ref_img_flipped, ref_target_flipped = flip(ref_img, ref_target)
                ref_kps_flipped = ref_target_flipped['keypoints'][0].detach().numpy()
                ref_kps_flipped_merged = merge_head(ref_kps_flipped)

                filter_ind_flipped = filter_kps(pred_kps_merged, ref_kps_flipped_merged, pred_scores_merged, 
                    min_score= min_score,
                    occluded = occluded,
                    filter_lr_confusion = filter_lr_confusion
                )

                # get kabsch kps
                kabsch_kps_flipped = do_kabsch_transform(pred_kps_merged, ref_kps_flipped_merged, 
                    translat_weights=translat_weights,
                    filter_ind=filter_ind_flipped
                )

                plot_image_with_kps(ref_img_flipped, [kabsch_kps_flipped, ref_kps_flipped_merged[filter_ind_flipped]],['r', 'k'])

    return most_similar_ind, most_similar_scores, flipped

In [None]:
test_id = 15
test_img, test_target = test_dataset[test_id]
model.eval()
prediction = model([test_img])
pred_box, pred_kps, pred_scores = get_max_prediction(prediction)

In [None]:
plot_image_with_kps(test_img, [pred_kps, pred_kps[pred_scores > 0]], ['k', 'w', 'r'])

In [None]:
# For aligning give a lot of weight to head and hips
t_weights = np.array([10, 5, 5, 3, 3, 1, 1, 10, 10, 3, 3, 1, 1])

kp_weights = np.array([3, 3, 3, 6, 6, 10, 10, 3, 3, 2, 2, 1, 1])

_, _, _, = get_most_similar_ind_and_scores(pred_kps, pred_scores, ref_dataset,
    num=10,
    plot=True,
    min_score=0,
    filter_lr_confusion=True,
    occluded=True,
    threshold_pct= 0.3,
    translat_weights = t_weights,
    kp_weights = kp_weights)

In [None]:
def flip_coco_person_keypoints(kps, width):
    flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
    flipped_data = kps[:, flip_inds]
    flipped_data[..., 0] = width - flipped_data[..., 0]
    # Maintain COCO convention that if visibility == 0, then x, y = 0
    inds = flipped_data[..., 2] == 0
    flipped_data[inds] = 0
    return flipped_data

In [None]:
test_id = 15
test_img, test_target = test_dataset[test_id]
test_img_flipped, test_target_flipped = RandomHorizontalFlip(1.0)(test_img, test_target)
ref_kps_flipped = test_target_flipped['keypoints'][0].detach().numpy()
model.eval()
prediction = model([test_img_flipped])
pred_box, pred_kps, pred_scores = get_max_prediction(prediction)

In [None]:
plot_image_with_kps(test_img_flipped, [pred_kps, pred_kps[pred_scores > 0], ref_kps_flipped], ['k', 'w', 'r'])

In [None]:
# For aligning give a lot of weight to head and hips
t_weights = np.array([10, 5, 5, 3, 3, 1, 1, 10, 10, 3, 3, 1, 1])

kp_weights = np.array([3, 3, 3, 6, 6, 10, 10, 3, 3, 2, 2, 1, 1])

_,_,_ = get_most_similar_ind_and_scores(pred_kps, pred_scores, ref_dataset,
    num=50,
    plot=True,
    min_score=0,
    filter_lr_confusion=True,
    occluded=True,
    threshold_pct= 0.3,
    translat_weights = t_weights,
    kp_weights = kp_weights)

In [None]:
ref_img, ref_target = test_dataset[15]
ref_img_flipped, ref_target_flipped = RandomHorizontalFlip(1.0)(ref_img, ref_target)
ref_kps = ref_target_flipped['keypoints'][0].detach().numpy()
ref_kps_merged = merge_head(ref_kps)

pred_kps_merged = merge_head(pred_kps)
pred_scores_merged = merge_head(pred_scores)
# filter again
filter_ind = filter_kps(pred_kps_merged, ref_kps_merged, pred_scores_merged, 
    min_score= 0,
    occluded = True,
    filter_lr_confusion = True
)
print(pred_kps_merged)
print(pred_scores_merged)
print(ref_kps_merged)
print(filter_ind)
# get kabsch kps
kabsch_kps = do_kabsch_transform(pred_kps_merged, ref_kps_merged, 
    translat_weights=t_weights,
    filter_ind=filter_ind
)

plot_image_with_kps(ref_img_flipped, [kabsch_kps, ref_kps_merged[filter_ind], pred_kps_merged], ['r', 'k','w'])

In [None]:
ref_kps = ref_target['keypoints'][0].detach().numpy()
ref_kps_merged = merge_head(ref_kps)

# filter again
filter_ind = filter_kps(pred_kps_merged, ref_kps_merged, pred_scores_merged, 
    min_score= min_score,
    occluded = occluded,
    filter_lr_confusion = filter_lr_confusion
)

# get kabsch kps
kabsch_kps = do_kabsch_transform(pred_kps_merged, ref_kps_merged, 
    translat_weights=translat_weights,
    filter_ind=filter_ind
)

plot_image_with_kps(ref_img, [kabsch_kps, ref_kps_merged[filter_ind]], ['r', 'k'])

In [None]:
def get_similarity_w_flip(pred_kps, pred_scores, ref_target,
    min_score=0,
    filter_lr_confusion=False,
    occluded=False,
    threshold_pct= 0.3,
    translat_weights = None,
    kp_weights = None):

# get gt keypoint annotations
    ref_kps = ref_target['keypoints'][0].detach().numpy()
    ref_kps_merged = merge_head(ref_kps)

    # get flipped gt keypoint annotations
    ref_img_flipped, ref_target_flipped = flip(ref_img, ref_target)
    ref_kps_flipped = ref_target_flipped['keypoints'][0].detach().numpy()
    ref_kps_flipped_merged = merge_head(ref_kps_flipped)

    # filter according to parameters
    filter_ind = filter_kps(pred_kps_merged, ref_kps_merged, pred_scores_merged, 
        min_score= min_score,
        occluded = occluded,
        filter_lr_confusion = filter_lr_confusion
    )

    filter_ind_flipped =filter_kps(pred_kps_merged, ref_kps_flipped_merged, pred_scores_merged, 
        min_score= min_score,
        occluded = occluded,
        filter_lr_confusion = filter_lr_confusion
    )

        
    score = kabsch_similarity_score(pred_kps_merged, ref_kps_merged,
        filter_ind=filter_ind,
        translat_weights=translat_weights,
        threshold_pct=threshold_pct,
        weights=kp_weights
    )

    score_flipped = kabsch_similarity_score(pred_kps_merged, ref_kps_flipped_merged,
        filter_ind=filter_ind_flipped,
        translat_weights=translat_weights,
        threshold_pct=threshold_pct,
        weights=kp_weights
    )

    # add best score and keep track of whether it was by flipping
    if score > score_flipped:
        return score
    else:
        return score_flipped

In [None]:
start = 40
stop = 61
#anchor_poses = [50, 52, 54, 56, 59, 62, 64, 66]
#anchor_poses_ind = [41, 44, 47, 50, 52, 53, 55, 56]
anchor_poses_ind = [i for i in range(start,stop)]
#anchor_poses_ind = [40,43,45,47,48,50,52,54,57,58]
# TODO for clustering freestyle make legs negligible and maybe increase weight of arms
#for id in range(start, stop):
for id in anchor_poses_ind:
    img_tensor, target = ref_dataset[id]
    ref_kp = target['keypoints'][0].detach().numpy()
    plot_image_with_kps(img_tensor, [ref_kp])

In [None]:
anchor_ids = [40,43,45,47,48,50,52,54,57,58]
anchor_dataset = torch.utils.data.Subset(ref_dataset, anchor_ids)

cur_img, cur_target = test_dataset[10]
model.eval()
prediction = model([cur_img])
pred_box, pred_kps, pred_scores = get_max_prediction(prediction)
plot_image_with_kps(cur_img, [pred_kps[pred_scores>0]])
pred_box, pred_kps, pred_scores = get_max_prediction(prediction)

t_weights = np.array([10, 5, 5, 3, 3, 1, 1, 10, 10, 3, 3, 1, 1])

kp_weights = np.array([3, 3, 3, 6, 6, 10, 10, 3, 3, 2, 2, 1, 1])

best_ind, scores, flipped = get_most_similar_ind_and_scores(pred_kps, pred_scores, anchor_dataset, num=len(anchor_dataset), filter_lr_confusion=True, occluded=True, translat_weights=t_weights, kp_weights=kp_weights)


In [None]:
print(scores)
score_ind = np.argsort(best_ind)
print(flipped)
scores_ordered = scores[score_ind]

print(scores_ordered)

In [None]:
for i in best_ind:
    img, _= anchor_dataset[i]
    plot_image_with_kps(img, [])

# HMM


## Divide all poses from ref dataset into buckets each corresponding to 1 anchor frame

In [None]:
anchor_ids = np.array([40,43,45,47,48,50,52,54,57,58])
anchor_dataset = torch.utils.data.Subset(ref_dataset, anchor_ids)

t_weights = np.array([10, 5, 5, 3, 3, 1, 1, 10, 10, 3, 3, 1, 1])
kp_weights = np.array([3, 3, 3, 6, 6, 10, 10, 3, 3, 2, 2, 1, 1])



pose_buckets = {}
for id in anchor_ids:
    pose_buckets[id] = []

for id, (img_tensor, ref_target) in enumerate(ref_dataset):
    ref_kps = ref_target['keypoints'][0].detach().numpy()

    ref_scores = np.array([1] * len(ref_kps))

    best_ind, scores, flipped = get_most_similar_ind_and_scores(ref_kps, ref_scores, anchor_dataset, num=1,  filter_lr_confusion=False, occluded=True, translat_weights=t_weights, kp_weights=kp_weights)

    pose_buckets[anchor_ids[best_ind][0]] += [id]

In [None]:
for anchor_id, bucket in zip(pose_buckets.keys(), pose_buckets.values()):
    print('{}: {} entries'.format(anchor_id, len(bucket)))

## plot a bucket for visual inspection

In [None]:
bucket_ids = pose_buckets[anchor_ids[0]]

for id in bucket_ids:
    # get image
    img, target = ref_dataset[id]
    kps = target['keypoints'][0].detach().numpy()

    plot_image_with_kps(img, [kps])

## Calculate emission and transmission probabilities



In [None]:
# # emission probabilities
# # emission_prob = []
# # for bucket in pose_buckets.keys():
# #     emission_prob += [np.array([0]*len(pose_buckets.keys()))]

# transmission_prob = []
# for bucket in pose_buckets.keys():
#     transmission_prob += [np.array([0]*len(pose_buckets.keys()))]

# prev_id = None
# prev_anchor = None
# for id, (img,target) in tqdm(enumerate(ref_dataset)):
#     # make prediction of current
#     #prediction = model([img])
#     #pred_box, pred_kps, pred_scores = get_max_prediction(prediction)

#     # best_ind, scores, flipped = get_most_similar_ind_and_scores(pred_kps, pred_scores, anchor_dataset, num=1,  filter_lr_confusion=True,        occluded=False, translat_weights=t_weights, kp_weights=kp_weights)

#     #pred_bucket = best_ind[0]

#     # find gt bucket
#     gt_bucket = None
#     for gt_bucket_id, bucket in enumerate(pose_buckets.keys()):
#         if id in pose_buckets[bucket]:
#             gt_bucket = gt_bucket_id
#             break
    
#     cur_id = int(target['image_id'].item())

#     if prev_id is not None:
#         # current stride in dataset
#         if abs(cur_id - prev_id) <= 3:
#             transmission_prob[prev_anchor][gt_bucket] += 1
    

#     prev_id = cur_id
#     prev_anchor = gt_bucket

#     #emission_prob[gt_bucket][pred_bucket] += 1

# # normalize

# # for idx, bucket in enumerate(emission_prob):
# #     emission_prob[idx] = emission_prob[idx]/np.sum(emission_prob[idx])

# for idx, bucket in enumerate(transmission_prob):
#     transmission_prob[idx] = transmission_prob[idx]/np.sum(transmission_prob[idx])

In [None]:
print(emission_prob)

In [None]:
print(transmission_prob)

In [None]:

def viterbi_path(prior, transmat, obslik, scaled=True, ret_loglik=False):
    '''Finds the most-probable (Viterbi) path through the HMM state trellis
    Notation:
        Z[t] := Observation at time t
        Q[t] := Hidden state at time t
    Inputs:
        prior: np.array(num_hid)
            prior[i] := Pr(Q[0] == i)
        transmat: np.ndarray((num_hid,num_hid))
            transmat[i,j] := Pr(Q[t+1] == j | Q[t] == i)
        obslik: np.ndarray((num_hid,num_obs))
            obslik[i,t] := Pr(Z[t] | Q[t] == i)
        scaled: bool
            whether or not to normalize the probability trellis along the way
            doing so prevents underflow by repeated multiplications of probabilities
        ret_loglik: bool
            whether or not to return the log-likelihood of the best path
    Outputs:
        path: np.array(num_obs)
            path[t] := Q[t]
    '''
    num_hid = obslik.shape[0] # number of hidden states
    num_obs = obslik.shape[1] # number of observations (not observation *states*)

    # trellis_prob[i,t] := Pr((best sequence of length t-1 goes to state i), Z[1:(t+1)])
    trellis_prob = np.zeros((num_hid,num_obs))
    # trellis_state[i,t] := best predecessor state given that we ended up in state i at t
    trellis_state = np.zeros((num_hid,num_obs), dtype=int) # int because its elements will be used as indicies
    path = np.zeros(num_obs, dtype=int) # int because its elements will be used as indicies

    trellis_prob[:,0] = prior * obslik[:,0] # element-wise mult
    if scaled:
        scale = np.ones(num_obs) # only instantiated if necessary to save memory
        scale[0] = 1.0 / np.sum(trellis_prob[:,0])
        trellis_prob[:,0] *= scale[0]

    trellis_state[:,0] = 0 # arbitrary value since t == 0 has no predecessor
    for t in range(1, num_obs):
        for j in range(num_hid):
            trans_probs = trellis_prob[:,t-1] * transmat[:,j] # element-wise mult
            trellis_state[j,t] = trans_probs.argmax()
            trellis_prob[j,t] = trans_probs[trellis_state[j,t]] # max of trans_probs
            trellis_prob[j,t] *= obslik[j,t]
        if scaled:
            scale[t] = 1.0 / np.sum(trellis_prob[:,t])
            trellis_prob[:,t] *= scale[t]

    path[-1] = trellis_prob[:,-1].argmax()
    for t in range(num_obs-2, -1, -1):
        path[t] = trellis_state[(path[t+1]), t+1]

    if not ret_loglik:
        return path
    else:
        if scaled:
            loglik = -np.sum(np.log(scale))
        else:
            p = trellis_prob[path[-1],-1]
            loglik = np.log(p)
        return path, loglik

In [None]:
inference_dataset = PoseDataset([osp.join(project_root,'data/vzf/freestyle/freestyle_5')], train=False)

## For the inference dataset, store sequence of gt hidden state + make predictions of states and then use viterbi

In [None]:
t_weights = np.array([10, 5, 5, 3, 3, 1, 1, 10, 10, 3, 3, 1, 1])

kp_weights = np.array([3, 3, 3, 6, 6, 10, 10, 3, 3, 2, 2, 1, 1])

hidden_states = []
observations = []

#inference_subset = torch.utils.data.Subset(ref_dataset, range(15,len(ref_dataset)))
inference_subset = inference_dataset

obslik = np.zeros((len(anchor_ids), 0))
prev_id = None
for id, (img,target) in tqdm(enumerate(inference_subset)):
    cur_id = int(target['image_id'].item())

    if prev_id is not None:
        if abs(cur_id - prev_id) > 100:
            break

    prev_id = cur_id

    ref_kps = target['keypoints'][0].detach().numpy()
    prediction = model([img])
    pred_box, pred_kps, pred_scores = get_max_prediction(prediction)

    plot_image_with_kps(img, [pred_kps[pred_scores>0]])

    # get gt hidden state
    ref_scores = np.array([1] * len(ref_kps))
    best_ind, scores, flipped = get_most_similar_ind_and_scores(ref_kps, ref_scores, anchor_dataset, num=1,  filter_lr_confusion=False, occluded=True, translat_weights=t_weights, kp_weights=kp_weights)

    hidden_states += [best_ind[0]]

    # get observed state
    best_ind, scores, flipped = get_most_similar_ind_and_scores(pred_kps, pred_scores, anchor_dataset, num=len(anchor_ids),  filter_lr_confusion=False, occluded=False, translat_weights=t_weights, kp_weights=kp_weights)

    observations += [best_ind[0]]
    
    scores = np.array(scores)[np.argsort(best_ind)]
    scores = np.power(scores, np.array([1.8] * len(scores)))
    scores_norm = np.array(scores)/np.sum(np.array(scores))
    scores_norm = np.array([[score] for score in scores_norm])
    obslik = np.append(obslik, scores_norm, axis=1)
    
    print(obslik)

hidden_states = np.array(hidden_states)
observations = np.array(observations)


In [None]:
print(hidden_states)
print(observations)
print(obslik)

transmat = np.array(
    [
        [.5,.4,.1,0,0,0,0,0,0,0],
        [0,.5,.4,.1,0,0,0,0,0,0],
        [0,0,.5,.4,.1,0,0,0,0,0],
        [0,0,0,.5,.4,.1,0,0,0,0],
        [0,0,0,0,.5,.4,.1,0,0,0],
        [0,0,0,0,0,.5,.4,.1,0,0],
        [0,0,0,0,0,0,.5,.4,.1,0],
        [0,0,0,0,0,0,0,.5,.4,.1],
        [.1,0,0,0,0,0,0,0,.5,.4],
        [.4,.1,0,0,0,0,0,0,0,.5]
    ]
    )

observations2 = np.array([1, 2, 3, 4, 5, 5, 5, 3, 4, 2, 5])

In [None]:

mls = viterbi_path(np.array([0.1]*10), transmat, obslik, scaled=True)

In [None]:
print(mls)

In [None]:
from lib.matching.matching import get_observation_likelihood_and_hidden_state

obslik_list, observations_list, hidden_state_list = get_observation_likelihood_and_hidden_state(model, inference_dataset, anchor_ids)

