In [2]:
import json
import numpy as np
import os
import glob
import pickle

In [18]:
train_path = './Training/DOG/train_jsons'
valid_path = './Validation/DOG/valid_jsons'

In [19]:
train_file_list = glob.glob(train_path + '/*')
print(train_file_list)

['./Training/DOG/train_jsons\\BODYLOWER', './Training/DOG/train_jsons\\BODYSCRATCH', './Training/DOG/train_jsons\\BODYSHAKE', './Training/DOG/train_jsons\\FEETUP', './Training/DOG/train_jsons\\FOOTUP', './Training/DOG/train_jsons\\HEADING', './Training/DOG/train_jsons\\LYING', './Training/DOG/train_jsons\\MOUNTING', './Training/DOG/train_jsons\\SIT', './Training/DOG/train_jsons\\TAILING', './Training/DOG/train_jsons\\TAILLOW', './Training/DOG/train_jsons\\TURN', './Training/DOG/train_jsons\\WALKRUN']


In [20]:
# 총 train dataset video 갯수
sum = 0
for path in train_file_list:
    train_json_list = glob.glob(path + '/*')
    sum += len(train_json_list)
print(sum)

39537


In [36]:
# 총 valid dataset video 갯수
sum = 0
valid_file_list = glob.glob(valid_path + '/*')
for path in valid_file_list:
    valid_json_list = glob.glob(path + '/*')
    sum += len(valid_json_list)
print(sum)

4949


In [45]:
def getDataset(file_path, FRAME_LENGTH=100, NUMBER_OF_POINTS=30):
    file_list = glob.glob(file_path + '/*')
    total_X = []
    total_labels = []
    
    for path in file_list:
        json_list = glob.glob(path + '/*')
        for jsonname in json_list:
            with open(jsonname, 'r', encoding='UTF-8') as json_file:
                video_kp_list = []
                json_data = json.load(json_file)
                height = json_data["metadata"]["height"]
                width = json_data["metadata"]["width"]
                emotion = json_data["metadata"]['inspect']['emotion']
                total_labels.append(emotion)
                
                for item in json_data["annotations"]:
                    frame_kp_list = []
                    for key, value in sorted(item['keypoints'].items(), key=lambda item: int(item[0])):
                        if value is not None:
                            frame_kp_list.append(value['x']/width)
                            frame_kp_list.append(value['y']/height)
                        else:
                            frame_kp_list.append(0)
                            frame_kp_list.append(0)
                    video_kp_list.append(frame_kp_list) 
                
                # 프레임 수 패딩(길이: 100)
                FRAME_LENGTH = 100
                NUMBER_OF_POINTS = 30
                if len(video_kp_list) >= FRAME_LENGTH:
                    video_kp_list = video_kp_list[:FRAME_LENGTH] 
                else:
                    for i in range(FRAME_LENGTH - len(video_kp_list)):
                        video_kp_list.append([0 for i in range(NUMBER_OF_POINTS)])
                        
                total_X.append(video_kp_list)
        
    total_X = np.array(total_X)
    total_Y = np.array(total_labels)
    return total_X, total_Y

In [38]:
train_X, train_Y = getDataset(file_path=train_path)
print(train_X.shape, train_Y.shape)

(39537, 100, 30) (39537,)


In [43]:
print(train_X[5:7])

[[[0.42638889 0.3828125  0.44861111 ... 0.         0.76111111 0.57265625]
  [0.41388889 0.4109375  0.42638889 ... 0.         0.77361111 0.58515625]
  [0.3875     0.4078125  0.41388889 ... 0.         0.7875     0.57265625]
  ...
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]]

 [[0.         0.         0.4015625  ... 0.99444444 0.41822917 0.75740741]
  [0.         0.         0.36614583 ... 0.97962963 0.365625   0.77685185]
  [0.         0.         0.30260417 ... 0.         0.4203125  0.84814815]
  ...
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]]]


In [42]:
print(train_Y[:3])

['편안/안정' '편안/안정' '편안/안정']


In [46]:
valid_X, valid_Y = getDataset(file_path=valid_path)
print(valid_X.shape, valid_Y.shape)

(4949, 100, 30) (4949,)


In [47]:
print(valid_X[0], valid_Y[0])

[[0.5125     0.67890625 0.54583333 ... 0.25859375 0.56666667 0.203125  ]
 [0.525      0.67578125 0.55416667 ... 0.2609375  0.46666667 0.1828125 ]
 [0.53194444 0.6828125  0.55694444 ... 0.2796875  0.29444444 0.178125  ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]] 불안/슬픔


In [58]:
# save dataset numpy array
np.savez('train_dataset.npz', X=train_X, Y=train_Y)
np.savez('valid_dataset.npz', X=valid_X, Y=valid_Y)

In [59]:
# load npz
data = np.load('train_dataset.npz')
for i in data:
    print(i)

X
Y


In [60]:
data['X'][0]

array([[0.45277778, 0.45546875, 0.46805556, ..., 0.        , 0.        ,
        0.        ],
       [0.45277778, 0.45546875, 0.46805556, ..., 0.        , 0.        ,
        0.        ],
       [0.45138889, 0.4609375 , 0.46805556, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [61]:
test1 = data['X']
test2 = data['Y']
print(test1.shape, test2.shape)

(39537, 100, 30) (39537,)


(39537, 100, 30)