In [1]:
import numpy as np

In [2]:
# read from pickle
data = np.load("/home/u01163/3A/MotionBERT/data/action/ntu60_hrnet.pkl", allow_pickle=True)
data

{'split': {'xsub_train': ['S001C001P001R001A001',
   'S001C001P001R001A002',
   'S001C001P001R001A003',
   'S001C001P001R001A004',
   'S001C001P001R001A005',
   'S001C001P001R001A006',
   'S001C001P001R001A007',
   'S001C001P001R001A008',
   'S001C001P001R001A009',
   'S001C001P001R001A010',
   'S001C001P001R001A011',
   'S001C001P001R001A012',
   'S001C001P001R001A013',
   'S001C001P001R001A014',
   'S001C001P001R001A015',
   'S001C001P001R001A016',
   'S001C001P001R001A017',
   'S001C001P001R001A018',
   'S001C001P001R001A019',
   'S001C001P001R001A020',
   'S001C001P001R001A021',
   'S001C001P001R001A022',
   'S001C001P001R001A023',
   'S001C001P001R001A024',
   'S001C001P001R001A025',
   'S001C001P001R001A026',
   'S001C001P001R001A027',
   'S001C001P001R001A028',
   'S001C001P001R001A029',
   'S001C001P001R001A030',
   'S001C001P001R001A031',
   'S001C001P001R001A032',
   'S001C001P001R001A033',
   'S001C001P001R001A034',
   'S001C001P001R001A035',
   'S001C001P001R001A036',
   'S

In [3]:
data.keys()

dict_keys(['split', 'annotations'])

In [4]:
data["split"].keys()

dict_keys(['xsub_train', 'xsub_val', 'xview_train', 'xview_val'])

In [5]:
data["annotations"][0]

{'frame_dir': 'S001C001P001R001A001',
 'label': 0,
 'img_shape': (1080, 1920),
 'original_shape': (1080, 1920),
 'total_frames': 103,
 'keypoint': array([[[[1032. ,  334.8],
          [1041. ,  325.8],
          [1023.5,  325.8],
          ...,
          [1028. ,  611.5],
          [1063. ,  704. ],
          [1037. ,  695. ]],
 
         [[1032. ,  334. ],
          [1041. ,  325. ],
          [1023. ,  325. ],
          ...,
          [1027. ,  612.5],
          [1063. ,  707. ],
          [1036. ,  693.5]],
 
         [[1032. ,  334. ],
          [1041. ,  325. ],
          [1023. ,  325. ],
          ...,
          [1027. ,  612.5],
          [1063. ,  707. ],
          [1036. ,  698. ]],
 
         ...,
 
         [[1037. ,  321.8],
          [1050. ,  317.5],
          [1033. ,  313. ],
          ...,
          [1028. ,  612. ],
          [1064. ,  704. ],
          [1037. ,  695.5]],
 
         [[1039. ,  324. ],
          [1048. ,  315.2],
          [1035. ,  315.2],
          

In [6]:
import torch
import numpy as np
import os
import random
import copy
from torch.utils.data import Dataset, DataLoader
from lib.utils.utils_data import crop_scale, resample
from lib.utils.tools import read_pkl

def get_action_names(file_path = "data/action/ntu_actions.txt"):
    f = open(file_path, "r")
    s = f.read()
    actions = s.split('\n')
    action_names = []
    for a in actions:
        action_names.append(a.split('.')[1][1:])
    return action_names

def make_cam(x, img_shape):
    '''
        Input: x (M x T x V x C)
               img_shape (height, width)
    '''
    h, w = img_shape
    if w >= h:
        x_cam = x / w * 2 - 1
    else:
        x_cam = x / h * 2 - 1
    return x_cam

def coco2h36m(x):
    '''
        Input: x (M x T x V x C)

        COCO: {0-nose 1-Leye 2-Reye 3-Lear 4Rear 5-Lsho 6-Rsho 7-Lelb 8-Relb 9-Lwri 10-Rwri 11-Lhip 12-Rhip 13-Lkne 14-Rkne 15-Lank 16-Rank}

        H36M:
        0: 'root',
        1: 'rhip',
        2: 'rkne',
        3: 'rank',
        4: 'lhip',
        5: 'lkne',
        6: 'lank',
        7: 'belly',
        8: 'neck',
        9: 'nose',
        10: 'head',
        11: 'lsho',
        12: 'lelb',
        13: 'lwri',
        14: 'rsho',
        15: 'relb',
        16: 'rwri'
    '''
    y = np.zeros(x.shape)
    y[:,:,0,:] = (x[:,:,11,:] + x[:,:,12,:]) * 0.5
    y[:,:,1,:] = x[:,:,12,:]
    y[:,:,2,:] = x[:,:,14,:]
    y[:,:,3,:] = x[:,:,16,:]
    y[:,:,4,:] = x[:,:,11,:]
    y[:,:,5,:] = x[:,:,13,:]
    y[:,:,6,:] = x[:,:,15,:]
    y[:,:,8,:] = (x[:,:,5,:] + x[:,:,6,:]) * 0.5
    y[:,:,7,:] = (y[:,:,0,:] + y[:,:,8,:]) * 0.5
    y[:,:,9,:] = x[:,:,0,:]
    y[:,:,10,:] = (x[:,:,1,:] + x[:,:,2,:]) * 0.5
    y[:,:,11,:] = x[:,:,5,:]
    y[:,:,12,:] = x[:,:,7,:]
    y[:,:,13,:] = x[:,:,9,:]
    y[:,:,14,:] = x[:,:,6,:]
    y[:,:,15,:] = x[:,:,8,:]
    y[:,:,16,:] = x[:,:,10,:]
    return y

def random_move(data_numpy,
                angle_range=[-10., 10.],
                scale_range=[0.9, 1.1],
                transform_range=[-0.1, 0.1],
                move_time_candidate=[1]):
    data_numpy = np.transpose(data_numpy, (3,1,2,0)) # M,T,V,C-> C,T,V,M
    C, T, V, M = data_numpy.shape
    move_time = random.choice(move_time_candidate)
    node = np.arange(0, T, T * 1.0 / move_time).round().astype(int)
    node = np.append(node, T)
    num_node = len(node)
    A = np.random.uniform(angle_range[0], angle_range[1], num_node)
    S = np.random.uniform(scale_range[0], scale_range[1], num_node)
    T_x = np.random.uniform(transform_range[0], transform_range[1], num_node)
    T_y = np.random.uniform(transform_range[0], transform_range[1], num_node)
    a = np.zeros(T)
    s = np.zeros(T)
    t_x = np.zeros(T)
    t_y = np.zeros(T)
    # linspace
    for i in range(num_node - 1):
        a[node[i]:node[i + 1]] = np.linspace(
            A[i], A[i + 1], node[i + 1] - node[i]) * np.pi / 180
        s[node[i]:node[i + 1]] = np.linspace(S[i], S[i + 1], node[i + 1] - node[i])
        t_x[node[i]:node[i + 1]] = np.linspace(T_x[i], T_x[i + 1], node[i + 1] - node[i])
        t_y[node[i]:node[i + 1]] = np.linspace(T_y[i], T_y[i + 1], node[i + 1] - node[i])
    theta = np.array([[np.cos(a) * s, -np.sin(a) * s],
                      [np.sin(a) * s, np.cos(a) * s]])
    # perform transformation
    for i_frame in range(T):
        xy = data_numpy[0:2, i_frame, :, :]
        new_xy = np.dot(theta[:, :, i_frame], xy.reshape(2, -1))
        new_xy[0] += t_x[i_frame]
        new_xy[1] += t_y[i_frame]
        data_numpy[0:2, i_frame, :, :] = new_xy.reshape(2, V, M)
    data_numpy = np.transpose(data_numpy, (3,1,2,0)) # C,T,V,M -> M,T,V,C
    return data_numpy

def human_tracking(x):
    M, T = x.shape[:2]
    if M==1:
        return x
    else:
        diff0 = np.sum(np.linalg.norm(x[0,1:] - x[0,:-1], axis=-1), axis=-1)        # (T-1, V, C) -> (T-1)
        diff1 = np.sum(np.linalg.norm(x[0,1:] - x[1,:-1], axis=-1), axis=-1)
        x_new = np.zeros(x.shape)
        sel = np.cumsum(diff0 > diff1) % 2
        sel = sel[:,None,None]
        x_new[0][0] = x[0][0]
        x_new[1][0] = x[1][0]
        x_new[0,1:] = x[1,1:] * sel + x[0,1:] * (1-sel)
        x_new[1,1:] = x[0,1:] * sel + x[1,1:] * (1-sel)
        return x_new

class ActionDataset(Dataset):
    def __init__(self, data_path, data_split, n_frames=243, random_move=True, scale_range=[1,1], check_split=True):   # data_split: train/test etc.
        np.random.seed(0)
        dataset = read_pkl(data_path)
        if check_split:
            assert data_split in dataset['split'].keys()
            self.split = dataset['split'][data_split]
        annotations = dataset['annotations']
        self.random_move = random_move
        self.is_train = "train" in data_split or (check_split==False)
        if "oneshot" in data_split:
            self.is_train = False
        self.scale_range = scale_range
        motions = []
        labels = []
        for sample in annotations:
            if check_split and (not sample['frame_dir'] in self.split):
                continue
            resample_id = resample(ori_len=sample['total_frames'], target_len=n_frames, randomness=self.is_train)
            motion_cam = make_cam(x=sample['keypoint'], img_shape=sample['img_shape'])
            motion_cam = human_tracking(motion_cam)
            motion_cam = coco2h36m(motion_cam)
            motion_conf = sample['keypoint_score'][..., None]
            motion = np.concatenate((motion_cam[:,resample_id], motion_conf[:,resample_id]), axis=-1)
            if motion.shape[0]==1:                                  # Single person, make a fake zero person
                fake = np.zeros(motion.shape)
                motion = np.concatenate((motion, fake), axis=0)
            motions.append(motion.astype(np.float32))
            labels.append(sample['label'])
        self.motions = np.array(motions)
        self.labels = np.array(labels)

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.motions)

    def __getitem__(self, index):
        raise NotImplementedError

class NTURGBD(ActionDataset):
    def __init__(self, data_path, data_split, n_frames=243, random_move=True, scale_range=[1,1]):
        super(NTURGBD, self).__init__(data_path, data_split, n_frames, random_move, scale_range)

    def __getitem__(self, idx):
        'Generates one sample of data'
        motion, label = self.motions[idx], self.labels[idx] # (M,T,J,C)
        if self.random_move:
            motion = random_move(motion)
        if self.scale_range:
            result = crop_scale(motion, scale_range=self.scale_range)
        else:
            result = motion
        return result.astype(np.float32), label

class NTURGBD1Shot(ActionDataset):
    def __init__(self, data_path, data_split, n_frames=243, random_move=True, scale_range=[1,1], check_split=False):
        super(NTURGBD1Shot, self).__init__(data_path, data_split, n_frames, random_move, scale_range, check_split)
        oneshot_classes = [0, 6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78, 84, 90, 96, 102, 108, 114]
        new_classes = set(range(120)) - set(oneshot_classes)
        old2new = {}
        for i, cid in enumerate(new_classes):
            old2new[cid] = i
        filtered = [not (x in oneshot_classes) for x in self.labels]
        self.motions = self.motions[filtered]
        filtered_labels = self.labels[filtered]
        self.labels = [old2new[x] for x in filtered_labels]

    def __getitem__(self, idx):
        'Generates one sample of data'
        motion, label = self.motions[idx], self.labels[idx] # (M,T,J,C)
        if self.random_move:
            motion = random_move(motion)
        if self.scale_range:
            result = crop_scale(motion, scale_range=self.scale_range)
        else:
            result = motion
        return result.astype(np.float32), label

In [8]:
ntu60_xsub_train = NTURGBD(data_path="/home/u01163/3A/MotionBERT/data/action/ntu60_hrnet.pkl", data_split='xsub_train', n_frames=243, random_move=True, scale_range=[1,1])

(array([[[[-0.03792745,  0.09796131,  0.93408203],
          [-0.15404803,  0.09179604,  0.9765625 ],
          [-0.07607412,  0.55088484,  0.9736328 ],
          ...,
          [-0.17249382, -0.53876054,  0.87597656],
          [-0.22722238, -0.28496003,  0.8857422 ],
          [-0.20265216, -0.18049628,  0.89208984]],
 
         [[-0.03785229,  0.09718049,  0.93408203],
          [-0.15396178,  0.09093308,  0.9765625 ],
          [-0.0763185 ,  0.550051  ,  0.9736328 ],
          ...,
          [-0.17195863, -0.53960043,  0.87597656],
          [-0.22686434, -0.28585362,  0.8857422 ],
          [-0.20236969, -0.18137789,  0.89208984]],
 
         [[-0.03777736,  0.09640074,  0.93408203],
          [-0.15387583,  0.0900712 ,  0.9765625 ],
          [-0.07656312,  0.54921806,  0.9736328 ],
          ...,
          [-0.17142367, -0.54043925,  0.87597656],
          [-0.22650653, -0.2867456 ,  0.8857422 ],
          [-0.20208752, -0.18225873,  0.89208984]],
 
         ...,
 
         [[-

In [24]:
trainloader_params = {
        'batch_size': 32,
        'shuffle': True,
        'num_workers': 8,
        'pin_memory': True,
        'prefetch_factor': 4,
        'persistent_workers': True
}
train_loader = DataLoader(ntu60_xsub_train, **trainloader_params)

In [26]:
idx, (batch_input, batch_label) = next(enumerate(train_loader))
batch_input.shape, batch_label.shape

(torch.Size([32, 2, 243, 17, 3]), torch.Size([32]))

In [30]:
batch_input[0].shape

torch.Size([2, 243, 17, 3])

In [29]:
batch_label

tensor([39, 22, 59, 32, 19, 17,  3, 21, 46, 29, 39, 14, 30,  7,  5, 50, 19, 16,
        38, 39,  8,  4, 41, 10, 13, 40, 10, 18, 28, 49, 51,  1])

In [9]:
sample = ntu60_xsub_train[0]
sample

(array([[[[-0.2091977 , -0.17620307,  0.93408203],
          [-0.3127736 , -0.1923492 ,  0.9765625 ],
          [-0.28481966,  0.22650135,  0.9736328 ],
          ...,
          [-0.27166396, -0.7595334 ,  0.87597656],
          [-0.34395093, -0.53692317,  0.8857422 ],
          [-0.33146703, -0.44099122,  0.89208984]],
 
         [[-0.2077905 , -0.17438895,  0.93408203],
          [-0.31129116, -0.1905458 ,  0.9765625 ],
          [-0.28344524,  0.22802055,  0.9736328 ],
          ...,
          [-0.27008998, -0.7573283 ,  0.87597656],
          [-0.34237397, -0.5348877 ,  0.8857422 ],
          [-0.32991904, -0.43901914,  0.89208984]],
 
         [[-0.2063834 , -0.17257231,  0.93408203],
          [-0.30980897, -0.18873984,  0.9765625 ],
          [-0.28207088,  0.22954226,  0.9736328 ],
          ...,
          [-0.26851642, -0.75512093,  0.87597656],
          [-0.34079742, -0.5328495 ,  0.8857422 ],
          [-0.32837135, -0.43704504,  0.89208984]],
 
         ...,
 
         [[ 

In [10]:
sample[0].shape

(2, 243, 17, 3)

In [15]:
# sample[0].shape = (2, 243, 17, 3)
sample[0][1]

array([[[-0.6855411 ,  1.        ,  0.        ],
        [-0.6855411 ,  1.        ,  0.        ],
        [-0.6855411 ,  1.        ,  0.        ],
        ...,
        [-0.6855411 ,  1.        ,  0.        ],
        [-0.6855411 ,  1.        ,  0.        ],
        [-0.6855411 ,  1.        ,  0.        ]],

       [[-0.6842015 ,  1.        ,  0.        ],
        [-0.6842015 ,  1.        ,  0.        ],
        [-0.6842015 ,  1.        ,  0.        ],
        ...,
        [-0.6842015 ,  1.        ,  0.        ],
        [-0.6842015 ,  1.        ,  0.        ],
        [-0.6842015 ,  1.        ,  0.        ]],

       [[-0.6828615 ,  1.        ,  0.        ],
        [-0.6828615 ,  1.        ,  0.        ],
        [-0.6828615 ,  1.        ,  0.        ],
        ...,
        [-0.6828615 ,  1.        ,  0.        ],
        [-0.6828615 ,  1.        ,  0.        ],
        [-0.6828615 ,  1.        ,  0.        ]],

       ...,

       [[-0.34801465,  1.        ,  0.        ],
        [-0

In [16]:
sample[1]

0

In [22]:
for i in range(1000):
    print(ntu60_xsub_train[i][1])

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
5

In [37]:
json_alpha = [1001.677001953125, 456.9465637207031, 0.9723501205444336, 1010.5280151367188, 448.09552001953125, 0.9660100340843201, 992.8259887695312, 452.52105712890625, 0.9780716300010681, 1019.3790283203125, 456.9465637207031, 0.9493798017501831, 983.9749145507812, 456.9465637207031, 0.9490735530853271, 1045.93212890625, 514.478271484375, 0.947884738445282, 961.8473510742188, 518.90380859375, 0.9424760341644287, 1054.783203125, 580.8610229492188, 0.9525023698806763, 961.8473510742188, 585.2864990234375, 0.9528788328170776, 1063.6341552734375, 638.3927001953125, 0.9281706213951111, 957.4218139648438, 642.8182373046875, 0.9405717253684998, 1028.2301025390625, 642.8182373046875, 0.8878276944160461, 979.5494384765625, 642.8182373046875, 0.91289883852005, 1019.3790283203125, 744.6050415039062, 0.9656325578689575, 988.4004516601562, 744.6050415039062, 0.944460391998291, 1010.5280151367188, 833.1153564453125, 0.9190905690193176, 997.25146484375, 841.9664306640625, 0.9756958484649658, 1001.677001953125, 425.96795654296875, 0.9455900192260742, 1001.677001953125, 492.3506774902344, 0.9471628069877625, 1006.1025390625, 638.3927001953125, 0.9315934181213379, 1006.1025390625, 859.66845703125, 0.7868576645851135, 1001.677001953125, 864.093994140625, 0.9078227281570435, 1014.9535522460938, 859.66845703125, 0.8830437064170837, 992.8259887695312, 868.51953125, 0.9243642687797546, 1006.1025390625, 850.8174438476562, 0.7291236519813538, 1001.677001953125, 859.66845703125, 0.804639995098114]
json_alpha = np.array(json_alpha)

In [43]:
import json

def halpe2h36m(x):
    '''
        Input: x (T x V x C)
       //Halpe 26 body keypoints
    {0,  "Nose"},
    {1,  "LEye"},
    {2,  "REye"},
    {3,  "LEar"},
    {4,  "REar"},
    {5,  "LShoulder"},
    {6,  "RShoulder"},
    {7,  "LElbow"},
    {8,  "RElbow"},
    {9,  "LWrist"},
    {10, "RWrist"},
    {11, "LHip"},
    {12, "RHip"},
    {13, "LKnee"},
    {14, "Rknee"},
    {15, "LAnkle"},
    {16, "RAnkle"},
    {17,  "Head"},
    {18,  "Neck"},
    {19,  "Hip"},
    {20, "LBigToe"},
    {21, "RBigToe"},
    {22, "LSmallToe"},
    {23, "RSmallToe"},
    {24, "LHeel"},
    {25, "RHeel"},
    '''
    T, V, C = x.shape
    y = np.zeros([T,17,C])
    y[:,0,:] = x[:,19,:]
    y[:,1,:] = x[:,12,:]
    y[:,2,:] = x[:,14,:]
    y[:,3,:] = x[:,16,:]
    y[:,4,:] = x[:,11,:]
    y[:,5,:] = x[:,13,:]
    y[:,6,:] = x[:,15,:]
    y[:,7,:] = (x[:,18,:] + x[:,19,:]) * 0.5
    y[:,8,:] = x[:,18,:]
    y[:,9,:] = x[:,0,:]
    y[:,10,:] = x[:,17,:]
    y[:,11,:] = x[:,5,:]
    y[:,12,:] = x[:,7,:]
    y[:,13,:] = x[:,9,:]
    y[:,14,:] = x[:,6,:]
    y[:,15,:] = x[:,8,:]
    y[:,16,:] = x[:,10,:]
    return y

def read_input(json_path, vid_size, scale_range, focus):
    with open(json_path, "r") as read_file:
        results = json.load(read_file)
    kpts_all = []
    for item in results:
        if focus!=None and item['idx']!=focus:
            continue
        kpts = np.array(item['keypoints']).reshape([-1,3])
        kpts_all.append(kpts)
    kpts_all = np.array(kpts_all)
    kpts_all = halpe2h36m(kpts_all)
    if vid_size:
        w, h = vid_size
        scale = min(w,h) / 2.0
        kpts_all[:,:,:2] = kpts_all[:,:,:2] - np.array([w, h]) / 2.0
        kpts_all[:,:,:2] = kpts_all[:,:,:2] / scale
        motion = kpts_all
    if scale_range:
        motion = crop_scale(kpts_all, scale_range)
    return motion.astype(np.float32)

In [44]:
import imageio

vid = imageio.get_reader("/home/u01163/3A/MotionBERT/data/dance1.mp4", 'ffmpeg')
fps_in = vid.get_meta_data()['fps']
vid_size = vid.get_meta_data()['size']
alpha_res = read_input("/home/u01163/3A/MotionBERT/data/alphapose-results.json", vid_size, scale_range=[1,1], focus=None)

In [47]:
def flip_data(data):
    """
    horizontal flip
        data: [N, F, 17, D] or [F, 17, D]. X (horizontal coordinate) is the first channel in D.
    Return
        result: same
    """
    left_joints = [4, 5, 6, 11, 12, 13]
    right_joints = [1, 2, 3, 14, 15, 16]
    flipped_data = copy.deepcopy(data)
    flipped_data[..., 0] *= -1                                               # flip x of all joints
    flipped_data[..., left_joints+right_joints, :] = flipped_data[..., right_joints+left_joints, :]
    return flipped_data

In [48]:
flip_data(alpha_res).shape

(44, 17, 3)

In [62]:
import math
class WildDetDataset(Dataset):
    def __init__(self, json_path, clip_len=243, vid_size=None, scale_range=None, focus=None):
        self.json_path = json_path
        self.clip_len = clip_len
        self.vid_all = read_input(json_path, vid_size, scale_range, focus)

    def __len__(self):
        'Denotes the total number of samples'
        return math.ceil(len(self.vid_all) / self.clip_len)

    def __getitem__(self, index):
        'Generates one sample of data'
        st = index*self.clip_len
        end = min((index+1)*self.clip_len, len(self.vid_all))
        return self.vid_all[st:end]

In [50]:
wild_det_ds = WildDetDataset(json_path="/home/u01163/3A/MotionBERT/data/alphapose-results.json", clip_len=243, vid_size=vid_size, scale_range=[1,1], focus=None)
wild_det_ds

<__main__.WildDetDataset at 0x7fd386433d50>

In [53]:
wild_det_ds[0].shape

(44, 17, 3)

In [54]:
testloader_params = {
          'batch_size': 1,
          'shuffle': False,
          'num_workers': 8,
          'pin_memory': True,
          'prefetch_factor': 4,
          'persistent_workers': True,
          'drop_last': False
}

In [56]:
test_loader = DataLoader(wild_det_ds, **testloader_params)


In [63]:
sample = next(iter(test_loader))
sample.shape

torch.Size([1, 44, 17, 3])

In [60]:
sample.shape

(44, 17, 3)

In [115]:
class AlphaPoseDataset(Dataset):
    """
    Takes a list of json path and returns the corresponding dataset
    self.X: list of numpy array of shape (2, n_frames, 17, 3), second person is fake
    self.y: list of labels
    """
    def __init__(self, json_paths, labels, n_frames=243, random_move=True, scale_range=[1,1], check_split=True):
        np.random.seed(0)
        self.json_paths = json_paths
        self.y = labels
        self.random_move = random_move
        self.scale_range = scale_range
        self.X = []

        self._process_json()

    def __len__(self):
        """Denotes the total number of samples"""
        return len(self.json_paths)

    def _process_json(self):
        """
        Process the json files and store the data in self.X
        """
        for json_path in self.json_paths:
            motion = np.array(read_input(json_path, vid_size=None, scale_range=self.scale_range, focus=None))
            fake = np.zeros(motion.shape)
            motion = np.array([motion, fake])
            self.X.append(motion.astype(np.float32))

    def __getitem__(self, index):
        """
        Returns a sample of data
        self.X[index]: (2, n_frames, 17, 3)
        self.y[index]: label (0 or 1)
        """
        return self.X[index], self.y[index]


In [116]:
json_paths = ["/home/u01163/3A/MotionBERT/data/alphapose-results1.json", "/home/u01163/3A/MotionBERT/data/alphapose-results2.json"]*100
labels = [0, 1]*100

alphapose_dataset = AlphaPoseDataset(json_paths, labels, n_frames=243, random_move=True, scale_range=[1,1], check_split=True)

In [117]:
dataloader = DataLoader(alphapose_dataset, **trainloader_params)

dataloader

<torch.utils.data.dataloader.DataLoader at 0x7fd38c15e110>

In [118]:
alphapose_sample = next(iter(dataloader))

In [119]:
alphapose_sample

[tensor([[[[[ 0.1028, -0.0667,  0.9316],
            [ 0.0272, -0.0541,  0.9129],
            [ 0.0524,  0.2357,  0.9445],
            ...,
            [-0.0232, -0.4069,  0.9425],
            [-0.0232, -0.2179,  0.9529],
            [-0.0358, -0.0541,  0.9406]],
 
           [[ 0.1043, -0.0667,  0.9306],
            [ 0.0285, -0.0541,  0.9142],
            [ 0.0538,  0.2241,  0.9524],
            ...,
            [-0.0221, -0.4080,  0.9500],
            [-0.0221, -0.2184,  0.9577],
            [-0.0347, -0.0541,  0.9426]],
 
           [[ 0.1041, -0.0671,  0.9318],
            [ 0.0282, -0.0545,  0.9143],
            [ 0.0535,  0.2240,  0.9507],
            ...,
            [-0.0225, -0.4088,  0.9514],
            [-0.0225, -0.2190,  0.9555],
            [-0.0351, -0.0545,  0.9421]],
 
           ...,
 
           [[-0.0080,  0.2864,  0.9358],
            [-0.1621,  0.3249,  0.8805],
            [-0.1621,  0.9030,  0.9244],
            ...,
            [-0.2392, -0.4265,  0.9594],
   

In [120]:
alphapose_sample[0].shape

torch.Size([32, 2, 44, 17, 3])