# Augmented Cosine Pose Similarity Metric (aCPSM)

In [None]:
# imports



import os
import mediapipe as mp
import cv
import numpy as np
from os import listdir
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2


## Extract pose coordinates of the focal points

In [None]:
# extract_cords gives the coordinates of the focal points, in this example for detecting posture
# we need the left/right shoulder coords and the left/right hip (these are the focal points)
def extract_coords(rgb_image, detection_result):
    pose_landmarks_list = detection_result.pose_landmarks
    annotated_image = np.copy(rgb_image)
    
    pose_coords = []

    # Loop through the detected poses to visualize.
    for idx in range(len(pose_landmarks_list)):
        pose_landmarks = pose_landmarks_list[idx]

        # Draw the pose landmarks.
        pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        pose_landmarks_proto.landmark.extend([
          landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
        ])
        
        pose_coords.append([pose_landmarks_proto.landmark[11].x, pose_landmarks_proto.landmark[11].y])
        pose_coords.append([pose_landmarks_proto.landmark[12].x, pose_landmarks_proto.landmark[12].y])
        pose_coords.append([pose_landmarks_proto.landmark[23].x, pose_landmarks_proto.landmark[23].y])
        pose_coords.append([pose_landmarks_proto.landmark[24].x, pose_landmarks_proto.landmark[24].y])

    return pose_coords 

## Calculating Benchmark Poses

In [None]:
# Calculating the bench mark poses can be explaine best with how they work.
# from the whole dataset, if you take 100 of the left-tilt posture of people and make an average of their
# shoulder and hip coordinates then that is the average left-tilt coordinates of the posture, aka left-tilt benchmark pose

            

model_path = 'pose_landmarker_lite.task'

base_options = python.BaseOptions(model_asset_path=model_path)
options = vision.PoseLandmarkerOptions(
    base_options=base_options,
    output_segmentation_masks=True)
detector = vision.PoseLandmarker.create_from_options(options)


##################
f = 'path/to/folder/for/benchmark/poses'  ## folder path

## BM = benchmark
## a process for this model
bm_poses = []
bm_left = []
bm_right = []
bm_straight = []

i = 0

als = [0, 0]
ars = [0, 0]
alh = [0, 0]
arh = [0, 0]

for images in os.listdir(f + '/left'):
    path = f + '/left'
    image = mp.Image.create_from_file(path +'/'+images)
    detection_result = detector.detect(image)
    p = extract_coords(image.numpy_view(), detection_result)    
    
    als = [als[0] + p[0][0], als[1] + p[0][1]]
    ars = [ars[0] + p[1][0], ars[1] + p[1][1]]
    alh = [alh[0] + p[2][0], alh[1] + p[2][1]]
    arh = [arh[0] + p[3][0], arh[1] + p[3][1]]

    i += 1
    if i == 100:
        als = [als[0]/i, als[1]/i]
        ars = [ars[0]/i, ars[1]/i]
        alh = [alh[0]/i, alh[1]/i]
        arh = [arh[0]/i, arh[1]/i]
        bm_left = [als, ars, alh, arh]
        print(f'left: {bm_left}')
        i = 0
        break

for images in os.listdir(f + '/right'):
    path = f + '/right'
    image = mp.Image.create_from_file(path +'/'+images)
    detection_result = detector.detect(image)
    p = extract_coords(image.numpy_view(), detection_result)    
    
    als = [als[0] + p[0][0], als[1] + p[0][1]]
    ars = [ars[0] + p[1][0], ars[1] + p[1][1]]
    alh = [alh[0] + p[2][0], alh[1] + p[2][1]]
    arh = [arh[0] + p[3][0], arh[1] + p[3][1]]

    i += 1
    if i == 100:
        als = [als[0]/i, als[1]/i]
        ars = [ars[0]/i, ars[1]/i]
        alh = [alh[0]/i, alh[1]/i]
        arh = [arh[0]/i, arh[1]/i]
        bm_right = [als, ars, alh, arh]
        print(f'right: {bm_right}')
        i = 0
        break

for images in os.listdir(f + '/straight'):
    path = f + '/straight'
    image = mp.Image.create_from_file(path +'/'+images)
    detection_result = detector.detect(image)
    p = extract_coords(image.numpy_view(), detection_result)    
    
    als = [als[0] + p[0][0], als[1] + p[0][1]]
    ars = [ars[0] + p[1][0], ars[1] + p[1][1]]
    alh = [alh[0] + p[2][0], alh[1] + p[2][1]]
    arh = [arh[0] + p[3][0], arh[1] + p[3][1]]

    i += 1
    if i == 100:
        als = [als[0]/i, als[1]/i]
        ars = [ars[0]/i, ars[1]/i]
        alh = [alh[0]/i, alh[1]/i]
        arh = [arh[0]/i, arh[1]/i]
        bm_straight = [als, ars, alh, arh]
        print(f'straight: {bm_straight}')
        i = 0
        break
        
        


# Helper functions

Functions used for calculating the pose similarity metric

In [None]:

# calculates the similiarity between two vectors using the cosine of their angles
def cos_similarity(source_representation, test_representation):
    a = np.matmul(np.transpose(source_representation), test_representation)
    b = np.sum(np.multiply(source_representation, source_representation))
    c = np.sum(np.multiply(test_representation, test_representation))
    return 1 - (a / (np.sqrt(b) * np.sqrt(c)))    

# bmp = benchmark pose
# calculates the difference in x-coordinates of the benchmark pose and the 
# given pose which we do not know yet its classification
def x_diff(bmp, t_p):
    sump = 0 
    x = 0
    while (x < 2):
        sump += bmp[x][0] - t_p[x][0]
        x+=1
    return sump / 2


def cosine_diff_conf(yhat, t_p):
    p_conf = []

    exp_bias = 0.5

    p_conf.append((np.power(1 - np.abs(x_diff(bm_left, t_p)), yhat[0] + exp_bias)))
    p_conf.append((np.power(1 - np.abs(x_diff(bm_right, t_p)), yhat[1] + exp_bias)))
    p_conf.append((np.power(1 - np.abs(x_diff(bm_straight, t_p)), yhat[2] + exp_bias)))
    
    return p_conf


# Running the aCSPM

Following code block runs the aCSPM. This involves taking the testing images and using the stand-alone model to predict them \
for each of the three classifications. This notated as yhat. Then the benchmark poses and averages are calculated with the \
helper functions. Results are stored in left, right and straight lists. For an average these results need to be divided by the  
number of images of the respective test class.

In [None]:
dir_name = 'path/for/testing'

left = []
right = []
straight = []

gamma_left = []
gamma_right = []
gamma_straight = []

for img_file in os.listdir(dir_name + '/left'):
    imgt = cv2.imread(dir_name + '/left/'+img_file)
    resize = tf.image.resize(imgt, (256,256))
    yhat = model.predict(np.expand_dims(resize/255, 0))
    
    if yhat[0][0] == max(yhat[0]):
        left.append(yhat[0][0])

    image = mp.Image.create_from_file(dir_name+'/left/'+img_file)
    detection_result = detector.detect(image)

    t_p = extract_coords(image.numpy_view(), detection_result)
    
    y = cosine_diff_conf([yhat[0][0], yhat[0][1], yhat[0][2]], t_p)

    if y[0] == max(y):
        gamma_left.append(y[0])
        
for img_file in os.listdir(dir_name + '/right'):
    imgt = cv2.imread(dir_name + '/right/'+img_file)
    resize = tf.image.resize(imgt, (256,256))
    yhat = model.predict(np.expand_dims(resize/255, 0))
    
    if yhat[0][1] == max(yhat[0]):
        right.append(yhat[0][1])

    image = mp.Image.create_from_file(dir_name+'/right/'+img_file)
    detection_result = detector.detect(image)

    t_p = extract_coords(image.numpy_view(), detection_result)
    
    y = cosine_diff_conf([yhat[0][0], yhat[0][1], yhat[0][2]], t_p)

    if y[1] == max(y):
        gamma_right.append(y[1])
        
        
for img_file in os.listdir(dir_name + '/straight'):
    imgt = cv2.imread(dir_name + '/straight/'+img_file)
    resize = tf.image.resize(imgt, (256,256))
    yhat = model.predict(np.expand_dims(resize/255, 0))
    print(f'normal: {yhat[0]}')

    if yhat[0][2] == max(yhat[0]):
        straight.append(yhat[0][2])

    image = mp.Image.create_from_file(dir_name+'/straight/'+img_file)
    detection_result = detector.detect(image)
    
    t_p = extract_coords(image.numpy_view(), detection_result)
    
    y = cosine_diff_conf([yhat[0][0], yhat[0][1], yhat[0][2]], t_p)
    
    print(f'gamma: {y}')
    if y[2] == max(y):
        gamma_straight.append(y[2])


# Results

Some of the results are shown below.

First three classifications are from the stand-alone model such as the one from cnn-lstm.ipynb.\
The other three (PSM) classifications are from pose similarity metric which fuses the landmarks and the stand-alone model.\
The images used for these tests are sub-par, low-quality, and bad lighting hence the low accuracies. The PSM seems to be \
fixing them very well.

In [None]:
# n stands for the number of images, normally they do not need to be the same number
n = 100
print(f'class acc (left): {len(left) / n}')        
print(f'class acc (right): {len(right) / n}')
print(f'class acc (straight): {len(straight) / n}')




# class acc (left): 0.5714285714285714
# class acc (right): 0.9411764705882353
# class acc (straight): 0.0
# class PSM acc (left): 0.9523809523809523
# class PSM acc (right): 0.9411764705882353
# class PSM acc (straight): 0.8461538461538461

# class acc (left): 0.8095238095238095
# class acc (right): 0.8823529411764706
# class acc (straight): 0.23076923076923078
# class PSM acc (left): 0.8095238095238095
# class PSM acc (right): 0.7058823529411765
# class PSM acc (straight): 0.9230769230769231

# class acc (left): 0.6666666666666666
# class acc (right): 0.8823529411764706
# class acc (straight): 0.23076923076923078
# class PSM acc (left): 0.8095238095238095
# class PSM acc (right): 0.9411764705882353
# class PSM acc (straight): 0.8461538461538461

# class acc (left): 0.9523809523809523
# class acc (right): 0.9411764705882353
# class acc (straight): 0.15384615384615385
# class PSM acc (left): 0.9523809523809523
# class PSM acc (right): 0.9411764705882353
# class PSM acc (straight): 0.8461538461538461

## 