# Data Loader

The following code takes random samples from the h5 dataset (see previous notebook or the make features command) and empirically validates it.

It also contains some scrap code to efficiently generate batches.

In [1]:
%matplotlib notebook
import numpy as np
import h5py
import random

from matplotlib import animation

import os

from play_skeleton import *
from src.utils.joints import *

ntu_path = "/media/gnocchi/Seagate Backup Plus Drive/NTU-RGB-D/"

rgb_folder = "nturgb+d_rgb/"
skeleton_folder = "nturgb+d_skeletons/"

h5_dataset_path = "/media/gnocchi/Seagate Backup Plus Drive/NTU-RGB-D/"
dataset_file_name = "skeleton.h5"
log_file_name = "log.txt"

### Create list of samples from the log file (contains all sample names)

In [2]:
# Sample names in a list (total of 56880 samples)
samples_names_list = [line.rstrip('\n') for line in open(h5_dataset_path + log_file_name)]

# Take random elements from list
# 1. Shuffle elements
random.shuffle(samples_names_list)

# 2. Take first n elements
n = 10
batch = samples_names_list[:n]

# 3. Remove first n elements
new_samples_names_list = samples_names_list[n:]

### Separate between cross-view and cross-subject sets

S : setup number 

C : camera id

P : performer id

R : replication number

A : action label

In [3]:
# Cross subject
training_subjects = [1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 28, 31, 34, 35, 38]

# Cross view
training_cameras = [2, 3]

In [4]:
# Create strings with subject ids : ['P001', 'P002', ..., 'P038']
training_subjects_pxxx = []
for s in training_subjects:
    training_subjects_pxxx.append("P{:03d}".format(s))
    
# Create strings with camera ids : ['C002', 'C003']
training_cameras_cxxx = []
for s in training_cameras:
    training_cameras_cxxx.append("C{:03d}".format(s))
    

In [5]:
# Get cs training samples (40320 samples)
cs_training_samples_list = [s for s in samples_names_list if any(xs in s for xs in training_subjects_pxxx)]

# Get cs testing samples (16560 samples)
cs_testing_samples_list = set(samples_names_list) - set(cs_training_samples_list)

# Validation set (5% of training set - 2016 samples) : take random samples out 
cs_validation_samples_list = [cs_training_samples_list.pop(random.randrange(len(cs_training_samples_list))) for _ in range(int(0.05 * len(cs_training_samples_list)))]
# assert not(any(item in cs_training_samples_list for item in cs_testing_samples_list))
# assert not(any(item in cs_training_samples_list for item in cs_validation_samples_list))


In [6]:
# Get cv training samples (37920 samples)
cv_training_samples_list = [s for s in samples_names_list if any(xs in s for xs in training_cameras_cxxx)]

# Get cv testing samples (18960 samples)
cv_testing_samples_list = set(samples_names_list) - set(cv_training_samples_list)

# Validation set (5% of training set - 1896 samples) : take random samples out 
cv_validation_samples_list = [cv_training_samples_list.pop(random.randrange(len(cv_training_samples_list))) for _ in range(int(0.05 * len(cv_training_samples_list)))]
# assert not(any(item in cv_training_samples_list for item in cv_testing_samples_list))
# assert not(any(item in cv_training_samples_list for item in cv_validation_samples_list))



### Get class vector from sample list

In [7]:
random.shuffle(cv_training_samples_list)
mini_batch = cv_training_samples_list[:10]

# Extract class
class_vector = [int(x[-3:]) for x in mini_batch]


## Random sample

skeleton : (3, max_frame, num_joint=25, 2) numpy array

hand_crops : (max_frame, n_hands = {2, 4}, crop_size, crop_size, 3)

In [8]:
# Sample names in a list (total of 56880 samples)
samples_names_list = [line.rstrip('\n') for line in open(h5_dataset_path + log_file_name)]

# Shuffle samples_names_list 
random.shuffle(samples_names_list)

# Take first random sample
sample_name = samples_names_list[0]

dataset = h5py.File(h5_dataset_path + dataset_file_name, 'r')

# List of samples (takes too long)
# a_group_key = list(dataset.keys())

skeleton = dataset[sample_name]["skeleton"][:]

dataset.close()

In [9]:
animateJointCoordinates(skeleton.transpose(3, 2, 0, 1)[0], connexion_tuples)

<IPython.core.display.Javascript object>

## PyTorch data loader

### 1. Skeleton map example

In [83]:
from src.models.torch_dataset import *
from src.models.utils import *

h5_dataset_path = "/media/gnocchi/Seagate Backup Plus Drive/NTU-RGB-D/"

# Sample names in a list (total of 56880 samples)
samples_names_list = [line.rstrip('\n') for line in open(h5_dataset_path + log_file_name)]

dataset = TorchDataset(model_type="FUSION",
                            use_pose=True,
                            use_ir=True,
                            use_cropped_IR=True,
                            data_path="/media/gnocchi/Seagate Backup Plus Drive/NTU-RGB-D/",
                            sub_sequence_length=6,
                            normalize_skeleton=True,
                            normalization_type="1-COORD-SYS",
                            augment_data=False,
                            samples_names=samples_names_list)

idx = random.randint(0,len(samples_names_list))
sample, y = dataset[idx]
print(classes[y])
skeleton_map = np.uint8(sample[0])

f = plt.figure()
ax = f.gca()
f.set_size_inches(10,10)

plt.imshow(skeleton_map.transpose(1, 2, 0))
plt.xlabel("frame")
plt.ylabel("joint")
plt.show()

wear jacket


<IPython.core.display.Javascript object>

### 2. IR sequence example

In [84]:
ir_sequence = np.uint8(sample[1])

for t in range(ir_sequence.shape[0]):
    f = plt.figure()
    ax = f.gca()
    f.set_size_inches(10,10)
    plt.imshow(ir_sequence[t].transpose(1, 2, 0))
    plt.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>