## Loading and checking data

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import json

%matplotlib inline

# Reading output of AlphaPose
with open('3d_pose_extraction/raw_ilya_hannah_dyads.json') as f:
    output = json.load(f)
print(output[0].keys())

In [None]:
# Checking the information for every frame
poses_per_frame = {}
for frame in output:
    image_id = int(frame['image_id'].split('.')[0])
    
    try:
        poses_per_frame[image_id] += 1
    except:
        poses_per_frame[image_id] = 1

# Are there frames in which no one is identified?
missing_frames = []
for k in [*poses_per_frame.keys()][:-1]:
    if poses_per_frame.get(k+1) is None:
        missing_frames.append(k+1)
print('Missing frames: {}\n'.format(missing_frames))

print('Possible number of people per frame: {}\n'.format(np.unique([*poses_per_frame.values()])))

# For some frames, either more than 2 people are identified or only 1 person is identified
less_instances = []
more_instances = []
for k, v in poses_per_frame.items():
    if v > 2:
        more_instances.append(k)
    elif v < 2:
        less_instances.append(k)
print('Frames with 1 person: {}\n'.format(less_instances))
print('Frames with more than 2 people: {}'.format(more_instances))

In [None]:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

# Visualizing 3 dancers on a frame
fig = plt.figure(figsize=(12, 6))
axes = []
color = ['red', 'blue', 'green']
skeleton = [(0, 1), (0, 2), (0, 3), (1, 4), (2, 5), (3, 6), (4, 7), (5, 8), (6, 9), (7, 10),
            (8, 11), (9, 12), (9, 13), (9, 14), (12, 15), (13, 16), (14, 17), (16, 18), (17, 19), 
            (18, 20), (19, 21), (20, 22), (21, 23)]

indexes = [6, 7, 8]
for i in range(len(indexes)):
    subplot_number = 131+i
    axes.append(fig.add_subplot(subplot_number, projection="3d"))
    axes[i].set_xlim([-1, 1])
    axes[i].set_ylim([-1, 1])
    axes[i].set_zlim([-1, 1])

    scatter_points = np.array(output[indexes[i]]['pred_xyz_jts'])
    axes[i].scatter(scatter_points[:, 2], scatter_points[:, 0], -scatter_points[:, 1], color=color[i])

    for (start, end) in skeleton:
        xs = [scatter_points[start, 2], scatter_points[end, 2]]
        ys = [scatter_points[start, 0], scatter_points[end, 0]]
        zs = [-scatter_points[start, 1], -scatter_points[end, 1]]
        axes[i].plot(xs, ys, zs, color='grey')

plt.tight_layout()
plt.show()

## Handling missing frames, single person frames and 3 people frames

In [None]:
# Processing frames with more or less than 2 people
filtered_output = output.copy()
wrong_instances = sorted(set(more_instances).union(set(less_instances)))

for i in wrong_instances:
    # Filtering frames with more than 2 people
    if poses_per_frame[i] > 2:
        scores = []
        for j in range(poses_per_frame[i]):
            scores.append(filtered_output[i*2+j]['score'])
        indices = np.argsort(scores)[:poses_per_frame[i]-2]
        
        if len(indices) > 1:
            indices = -np.sort(-indices)

        for z in indices:
            filtered_output.pop(i*2+z)

    # Enriching frames with only 1 person
    elif poses_per_frame[i] < 2:
        only_person = np.array(filtered_output[i*2]['pred_xyz_jts'])
        to_compare_1 = np.array(filtered_output[i*2-1]['pred_xyz_jts'])
        to_compare_2 = np.array(filtered_output[i*2-2]['pred_xyz_jts'])
        
        distance_1 = np.sum(np.linalg.norm(only_person - to_compare_1))
        distance_2 = np.sum(np.linalg.norm(only_person - to_compare_2))

        if distance_1 > distance_2:
            new_data = filtered_output[i*2-1].copy()
            new_data['image_id'] = filtered_output[i*2]['image_id']
            filtered_output.insert(i*2+1, new_data)
        else:
            new_data = filtered_output[i*2-2].copy()
            new_data['image_id'] = filtered_output[i*2]['image_id']
            filtered_output.insert(i*2, new_data)
        
filtered_poses_per_frame = {}
for frame in filtered_output:
    image_id = int(frame['image_id'].split('.')[0])
    
    try:
        filtered_poses_per_frame[image_id] += 1
    except:
        filtered_poses_per_frame[image_id] = 1
print('Possible number of people per frame: {}\n'.format(np.unique([*filtered_poses_per_frame.values()])))

## First visualizations

In [None]:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

from matplotlib import rcParams
rcParams['animation.embed_limit'] = 100

def animation(filtered_output, np_flag=False, interval=100):
    fig = plt.figure(figsize=(12, 6))
    
    ax1 = fig.add_subplot(121, projection="3d")
    ax1.set_xlim([-1, 1])
    ax1.set_ylim([-1, 1])
    ax1.set_zlim([-1, 1])
    scatt1 = ax1.scatter([], [], [], color='red')
    lines1 = [ax1.plot([], [], [], 'gray')[0] for _ in skeleton]
    
    ax2 = fig.add_subplot(122, projection="3d")
    ax2.set_xlim([-1, 1])
    ax2.set_ylim([-1, 1])
    ax2.set_zlim([-1, 1])
    scatt2 = ax2.scatter([], [], [], color='blue')
    lines2 = [ax2.plot([], [], [], 'gray')[0] for _ in skeleton]

    if np_flag:
        person_1_poses = filtered_output[0::2, :, :]
        person_2_poses = filtered_output[1::2, :, :]
    
    else:
        person_1_poses = []
        person_2_poses = []
        for i, frame in enumerate(filtered_output):
            if i%2 == 0:
                person_1_poses.append(frame['pred_xyz_jts'])
            else:
                person_2_poses.append(frame['pred_xyz_jts'])
        person_1_poses = np.array(person_1_poses)
        person_2_poses = np.array(person_2_poses)
    
    poses_1_x = person_1_poses[:, :, 2]
    poses_1_y = person_1_poses[:, :, 0]
    poses_1_z = -person_1_poses[:, :, 1]
    
    poses_2_x = person_2_poses[:, :, 2]
    poses_2_y = person_2_poses[:, :, 0]
    poses_2_z = -person_2_poses[:, :, 1]
    
    def update(frame):
        
        scatt1._offsets3d = (poses_1_x[frame], poses_1_y[frame], poses_1_z[frame])
        scatt2._offsets3d = (poses_2_x[frame], poses_2_y[frame], poses_2_z[frame])
    
        for line, (start, end) in zip(lines1, skeleton):
            line.set_data([poses_1_x[frame, start], poses_1_x[frame, end]], [poses_1_y[frame, start], poses_1_y[frame, end]])
            line.set_3d_properties([poses_1_z[frame, start], poses_1_z[frame, end]])
    
        for line, (start, end) in zip(lines2, skeleton):
            line.set_data([poses_2_x[frame, start], poses_2_x[frame, end]], [poses_2_y[frame, start], poses_2_y[frame, end]])
            line.set_3d_properties([poses_2_z[frame, start], poses_2_z[frame, end]])
        
        return scatt1, scatt2, *lines1, *lines2

    plt.close(fig)
    return FuncAnimation(fig, update, frames=range(len(poses_1_x)), interval=interval, blit=False)

ani = animation(filtered_output[:200])
HTML(ani.to_jshtml())

## Fixing dancer indices

In [None]:
# From the animation above, we can see the dancers sometimes exchange their positions
# This phenomenom, however, is not captured by the indices as the simple study below show
for i in range(int(len(filtered_output)/2)-1):
    if filtered_output[i*2]['idx'] > filtered_output[i*2+1]['idx']:
        print('Captured Inversion!')

In [None]:
# Matching instances in throughout the entirety of the video
for i in range(int(len(filtered_output)/2)-1):
    person_1 = np.array(filtered_output[i*2]['pred_xyz_jts'])
    to_compare_1 = np.array(filtered_output[(i+1)*2]['pred_xyz_jts'])
    to_compare_2 = np.array(filtered_output[(i+1)*2+1]['pred_xyz_jts'])
    
    distance_1 = np.sum(np.linalg.norm(person_1 - to_compare_1))
    distance_2 = np.sum(np.linalg.norm(person_1 - to_compare_2))

    if distance_1 > distance_2:
        change_order_aux = filtered_output[(i+1)*2+1].copy()
        filtered_output[(i+1)*2+1] = filtered_output[(i+1)*2]
        filtered_output[(i+1)*2] = change_order_aux

        filtered_output[(i+1)*2]['idx'] = 1
        filtered_output[(i+1)*2+1]['idx'] = 2

# ani = animation(filtered_output)
# HTML(ani.to_jshtml())

## Smoothing data to handle jitter

In [None]:
# Smoothing data to handle the jitter
from scipy.signal import savgol_filter

person_1_poses = []
person_2_poses = []
for i, frame in enumerate(filtered_output):
    if i%2 == 0:
        person_1_poses.append(frame['pred_xyz_jts'])
    else:
        person_2_poses.append(frame['pred_xyz_jts'])
person_1_poses = np.array(person_1_poses)
person_2_poses = np.array(person_2_poses)

# Savitzky-Golay filter
def savgol_smoothness(poses, wl=12, po=3):
    smoothed_data = np.zeros_like(poses)
    
    for joint in range(poses.shape[1]):
        for axis in range(3):
            smoothed_data[:, joint, axis] = savgol_filter(poses[:, joint, axis], wl, po)

    return smoothed_data

smoothed_data_1 = savgol_smoothness(person_1_poses)
smoothed_data_2 = savgol_smoothness(person_2_poses)

interleaved_array = np.zeros((2*smoothed_data_1.shape[0], smoothed_data_1.shape[1], smoothed_data_1.shape[2]))
interleaved_array[0::2] = smoothed_data_1
interleaved_array[1::2] = smoothed_data_2

# print("###############################################################################################")
# print("#################################### Savitzky-Golay Filter ####################################")
# print("###############################################################################################")
# ani = animation(interleaved_array, np_flag=True)
# HTML(ani.to_jshtml())

In [None]:
from scipy.fftpack import dct, idct

# Discrete cosine transform
def dct_smoothness(poses, threshold=0.25):
    smoothed_data = np.zeros_like(poses)
    
    for joint in range(poses.shape[1]):
        for axis in range(3):
            frequency_data = dct(poses[:, joint, axis], norm='ortho')
            
            frequency_data[int(threshold*len(frequency_data)):] = 0
            
            smoothed_data[:, joint, axis] = idct(frequency_data, norm='ortho')

    return smoothed_data

smoothed_data_1 = dct_smoothness(person_1_poses)
smoothed_data_2 = dct_smoothness(person_2_poses)
interleaved_array[0::2] = smoothed_data_1
interleaved_array[1::2] = smoothed_data_2

# print("###################################################################################################")
# print("#################################### Discrete Cosine Transform ####################################")
# print("###################################################################################################")
# ani = animation(interleaved_array, np_flag=True)
# HTML(ani.to_jshtml())

In [None]:
# 3D discrete cosine transform
def dct_3d_smoothness(poses, threshold=0.25):
    smoothed_data = np.zeros_like(poses)
    
    for joint in range(poses.shape[1]):
        frequency_data = dct(poses[:, joint, :], axis=0, norm='ortho')
        
        frequency_data[int(threshold*len(frequency_data)):] = 0
        
        smoothed_data[:, joint, :] = idct(frequency_data, axis=0, norm='ortho')

    return smoothed_data
    
smoothed_data_1 = dct_3d_smoothness(person_1_poses)
smoothed_data_2 = dct_3d_smoothness(person_2_poses)
interleaved_array[0::2] = smoothed_data_1
interleaved_array[1::2] = smoothed_data_2

print("######################################################################################################")
print("#################################### 3D Discrete Cosine Transform ####################################")
print("######################################################################################################")
ani = animation(interleaved_array[:200], np_flag=True)
HTML(ani.to_jshtml())

# Evaluating final poses

In [None]:
# Comparing the original video, the initial mesh reconstuction and the extracted pose after full processing pipeline
from IPython.display import Video, display

video_1_path = './ilya_poses/0210-0220.mp4'
video_2_path = './3d_pose_extraction/vis/mesh_video.mp4'
# Video(video_path, width=640, height=480, embed=True)

video_1_html = f"""
<video width="520" height="390" controls>
  <source src="{video_1_path}" type="video/mp4">
  Your browser does not support the video tag.
</video>
"""

video_2_html = f"""
<video width="520" height="390" controls>
  <source src="{video_2_path}" type="video/mp4">
  Your browser does not support the video tag.
</video>
"""

ani = animation(interleaved_array, np_flag=True, interval=35)
ani_html = ani.to_jshtml()

display(HTML(f"""
<div style="display: flex; justify-content: space-around; margin-bottom: 20px;">
    <div style="flex: 1; padding: 10px;">
        {video_1_html}
    </div>
    <div style="flex: 1; padding: 10px;">
        {video_2_html}
    </div>
</div>
<div style="text-align: center;">
    {ani_html}
</div>
"""))

# Saving data

In [None]:
# Saving processed data to use on the model
np.save('../model/data/pose_extraction_img_9085', interleaved_array)