# Exploration of the .skeleton files provided in the NTU-RGB-D dataset

S : setup number 
C : camera id
P : performer id
R : replication number
A : action label

In [1]:
import numpy as np
import os
from joints import *
from utils import *
import random

import ffmpeg
from matplotlib import animation
import matplotlib.pyplot as plt
from matplotlib.patches import Circle

ntu_path = "/media/gnocchi/Seagate Backup Plus Drive/NTU-RGB-D/"

rgb_folder = "nturgb+d_rgb/"
skeleton_folder = "nturgb+d_skeletons/"

sample_name = os.path.splitext(random.choice(os.listdir(ntu_path + skeleton_folder)))[0]
print(sample_name)
# sample_name = "S016C003P008R002A059"
# sample_name = "S001C001P001R001A001"
# sample_name = "S002C003P007R001A060"
# sample_name = "S001C003P003R001A060"
sample_name = "S001C003P008R002A027"

S017C001P016R002A002


## Skeleton numpy array

read_xyz(...) returns a (3, max_frame, num_joint=25, 2) numpy array

read_color_xy(...) returns (2, max_frame, num_joint=25, 2) numpy array

In [2]:
skeleton = read_xyz(ntu_path + skeleton_folder + sample_name + ".skeleton") # shape (3, 103, 25, 2)

print("===== 3D skeleton =====")
print(skeleton[:, 0, :, 0])
print(skeleton.transpose(3, 2, 0, 1)[0].shape)

print("\r\n===== 2D RGB skeleton =====")
skeleton_2d = read_color_xy(ntu_path + skeleton_folder + sample_name + ".skeleton")
print(skeleton_2d[:, 0, :, 0])

has_2_subjects = np.any(skeleton_2d[:, :, :, 1])
print("Number of subjects : " + str(int(has_2_subjects) * 2))

===== 3D skeleton =====
[[-0.428 -0.465 -0.526 -0.515 -0.591 -0.645 -0.552 -0.529 -0.444 -0.465
  -0.48  -0.455 -0.463 -0.47  -0.606 -0.52  -0.396 -0.232 -0.37  -0.322
  -0.509 -0.491 -0.533 -0.444 -0.448]
 [-0.039  0.1    0.237  0.299  0.207  0.045  0.153  0.189  0.186  0.141
   0.293  0.298 -0.057 -0.091  0.049  0.007 -0.021 -0.075 -0.214 -0.165
   0.204  0.231  0.206  0.301  0.286]
 [ 4.159  4.074  3.969  4.006  3.97   4.038  3.974  3.954  4.074  3.935
   4.036  4.07   4.108  3.901  3.942  3.911  4.171  3.959  3.906  3.867
   3.999  3.94   3.935  4.108  4.067]]
(25, 3, 58)

===== 2D RGB skeleton =====
[[858.8273 847.0178 827.6458 831.7496 810.3558 798.7665 821.1577 826.6184
  852.5406 843.2859 842.0481 849.4076 848.6212 841.1816 805.4875 827.8658
  867.3713 906.4996 868.4897 880.7781 833.1916 836.3325 824.7604 853.0991
  851.0748]
 [549.4968 513.5264 476.3401 460.6867 484.63   528.1583 498.9532 489.132
  491.2635 501.7779 462.7869 462.1273 554.4424 564.3855 526.6187 537.8014
  544.

## Skeleton animation

In [3]:
%matplotlib notebook
import matplotlib.pyplot as plt
import time
import mpl_toolkits.mplot3d as plt3d

def animateJointCoordinates(joint_coordinates, connexion_tuples):
    '''
    joint_coordinates : shape(joints, 3, seq_len)
    
    
    '''
    fig = plt.figure()
    fig.set_size_inches(10,10)
    ax = plt.axes(projection='3d')
    
    plt.ion()
    fig.show()
    fig.canvas.draw()
    
    x = 0
    y = 2
    z = 1

    
    for t in range(joint_coordinates.shape[2]):
        ax.clear()
        ax.set_xlim3d(np.amin(joint_coordinates[:, x, :]), np.amax(joint_coordinates[:, x, :]))
        ax.set_ylim3d(np.amin(joint_coordinates[:, y, :]), np.amax(joint_coordinates[:, y, :]))
        ax.set_zlim3d(np.amin(joint_coordinates[:, z, :]), np.amax(joint_coordinates[:, z, :]))
        
        ax.scatter(joint_coordinates[:, x, t], joint_coordinates[:, y, t], joint_coordinates[:, z, t])
        
        
        line = plt3d.art3d.Line3D([0.1, 0.1], [0.1, 0.5], [0.1, 0.1])
        
        head_neck = plt3d.art3d.Line3D([joint_coordinates[0, x, t], joint_coordinates[1, x, t]], 
                                      [joint_coordinates[0, y, t], joint_coordinates[1, y, t]], 
                                      [joint_coordinates[0, z, t], joint_coordinates[1, z, t]])
        
        # ax.add_line(head_neck)
        
        for i in range(connexion_tuples.shape[0]):
            j1 = connexion_tuples[i, 0]
            j2 = connexion_tuples[i, 1]
            
            
            joint_line = plt3d.art3d.Line3D([joint_coordinates[j1, x, t], joint_coordinates[j2, x, t]], 
                                            [joint_coordinates[j1, y, t], joint_coordinates[j2, y, t]], 
                                            [joint_coordinates[j1, z, t], joint_coordinates[j2, z, t]])
            
            ax.add_line(joint_line)
        
        ax.view_init(10, 10)
        
        fig.canvas.draw()
        plt.pause(.001)
        # time.sleep(0.01)

In [4]:
# animateJointCoordinates(skeleton.transpose(3, 2, 0, 1)[0], connexion_tuples)

## Video playback with hand tracking for 1 or 2 subjects

Creates a video object of size (seq_len, 1080, 1920, 3)

In [5]:
probe = ffmpeg.probe(ntu_path + rgb_folder + sample_name + '_rgb.avi')
video_info = next(x for x in probe['streams'] if x['codec_type'] == 'video')
width = int(video_info['width'])
height = int(video_info['height'])
num_frames = int(video_info['nb_frames'])
print(num_frames)

58


In [6]:
out, err = (ffmpeg
           .input(ntu_path + rgb_folder + sample_name + '_rgb.avi')
           .output('pipe:', format='rawvideo', pix_fmt='rgb24')
           .run(capture_stdout=True)
)
video = np.frombuffer(out, np.uint8).reshape([-1, height, width, 3])
print(video.shape)

(58, 1080, 1920, 3)


In [7]:
f = plt.figure()
ax = f.gca()
f.set_size_inches(10,10)

image = plt.imshow(video[0], interpolation='None', animated = True)

# skeleton_2d shape (2{xy}, max_frame, num_joint=25, 2)
right_hand_s1 = Circle((skeleton_2d[0, 0, Joints.HANDRIGHT, 0], skeleton_2d[1, 0, Joints.HANDRIGHT, 0]),15, color="red")
left_hand_s1 = Circle((skeleton_2d[0, 0, Joints.HANDLEFT, 0], skeleton_2d[1, 0, Joints.HANDLEFT, 0]),15, color="red")

ax.add_patch(right_hand_s1)
ax.add_patch(left_hand_s1)

if has_2_subjects:
    right_hand_s2 = Circle((skeleton_2d[0, 0, Joints.HANDRIGHT, 1], skeleton_2d[1, 0, Joints.HANDRIGHT, 1]),15)
    left_hand_s2 = Circle((skeleton_2d[0, 0, Joints.HANDLEFT, 1], skeleton_2d[1, 0, Joints.HANDLEFT, 1]),15)
    
    ax.add_patch(right_hand_s2)
    ax.add_patch(left_hand_s2)



plt.axis('off')


def videoAnimation(frame_index):
    image.set_data(video[frame_index])
    [p.remove() for p in ax.patches]
    right_hand_s1 = Circle((skeleton_2d[0, frame_index, Joints.HANDRIGHT, 0], skeleton_2d[1, frame_index, Joints.HANDRIGHT, 0]),15, color='red')
    left_hand_s1 = Circle((skeleton_2d[0, frame_index, Joints.HANDLEFT, 0], skeleton_2d[1, frame_index, Joints.HANDLEFT, 0]),15, color='red')
    
    ax.add_patch(right_hand_s1)
    ax.add_patch(left_hand_s1)
    
    if has_2_subjects:
        right_hand_s2 = Circle((skeleton_2d[0, frame_index, Joints.HANDRIGHT, 1], skeleton_2d[1, frame_index, Joints.HANDRIGHT, 1]),15)
        left_hand_s2 = Circle((skeleton_2d[0, frame_index, Joints.HANDLEFT, 1], skeleton_2d[1, frame_index, Joints.HANDLEFT, 1]),15)

        ax.add_patch(right_hand_s2)
        ax.add_patch(left_hand_s2)
    
    return image,

ani = animation.FuncAnimation(f, videoAnimation, interval = 200, frames = num_frames, repeat = False)


<IPython.core.display.Javascript object>

## Crop around the hands

According to "Pose-conditioned Spatio-Temporal Attention for Human Action Recognition" https://arxiv.org/pdf/1703.10106.pdf, the crops are 50x50p around hands on NTU dataset. The same cropping dimensions are kept on latter paper STA-Hands by same author.


In [8]:
hand_crops = extract_hands(skeleton_2d, video, crop_size) # shape (n_frames, 4, crop_size, crop_size, 3)
if hand_crops.shape[1] == 2:
    pad = np.zeros(hand_crops.shape, dtype=hand_crops.dtype)
    hand_crops = np.concatenate((hand_crops, pad), axis = 1)


489
826
487
826
487
826
487
827
530
833
544
843
513
818
514
817
550
835
551
837
554
837
567
829
572
842
560
827
569
839
568
841
563
842
522
820
508
817
515
817
509
819
505
820
526
836
528
835
565
838
561
843
566
841
559
843
566
845
566
843
567
843
569
842
570
845
551
839
563
840
568
842
568
844
569
839
568
843
561
842
567
842
570
839
570
838
570
839
571
840
569
838
569
842
569
842
569
843
570
843
547
839
541
838
514
839
523
835
521
834
565
833
570
833
551
838
575
1064
575
1064
575
1064
575
1063
575
1064
574
1064
574
1063
573
1062
574
1061
574
1060
574
1061
573
1060
573
1061
573
1062
584
1064
600
1067
609
1072
616
1081
592
1106
553
1132
495
1141
429
1139
368
1126
296
1132
227
1131
155
1137
84
1134
21
1127
-8
1102


ValueError: could not broadcast input array from shape (0,50,3) into shape (50,50,3)

In [None]:
# Using video for variable from above of shape (seq_len, 1080, 1920, 3)
f, (axs) = plt.subplots(2, 2)
f.set_size_inches(8, 8)
axs[0, 0].axis('off')
axs[0, 1].axis('off')
axs[1, 0].axis('off')
axs[1, 1].axis('off')

s1_l = axs[0, 0].imshow(hand_crops[0, 0])
s1_r = axs[0, 1].imshow(hand_crops[0, 1])
s2_l = axs[1, 0].imshow(hand_crops[0, 2])
s2_r = axs[1, 1].imshow(hand_crops[0, 3])


def videoAnimation(frame_index):
    s1_l.set_data(hand_crops[frame_index, 0])
    s1_r.set_data(hand_crops[frame_index, 1])
    s2_l.set_data(hand_crops[frame_index, 2])
    s2_r.set_data(hand_crops[frame_index, 3])
    
    axs[0, 0].set_title(frame_index)
    axs[0, 1].set_title(frame_index)
    axs[1, 0].set_title(frame_index)
    axs[1, 1].set_title(frame_index)
    
    
    return s1_l

ani = animation.FuncAnimation(f, videoAnimation, interval = 200, frames = num_frames, repeat = False)