In [1]:
import numpy as np
import os

import torch
from torch.utils.data import Dataset, DataLoader

In [2]:
# NTU RGB-D 120 data path

data_dir = '/data/zak/graph/ntu/train'

* The train directory contains video samples from 120 classes with around 114480 samples (some might be missing though)
* Each file name is in the following format:
    - `S013C003P037R001A004.skeleton.npy`
    - S013 stands for **Setup Number 13**
    - C003 stands for **Camera Number 03**
    - P037 stands for **Participant Number 037**
    - R001 stands for **Replication Number (001 or 002 only) << need to find out what this means**
    - A004 stands for **Action Class Number 004 (brush hair in this case)**

## Design data loader for the dataset

### A list of parameters that can be changed for the dataloader with their default values
- kp_shape = (25,3)
- seg_size = varies based on the action being performed (so, select a minimum segment size among all samples in the dataset)
    - min_seg_size in the dataset is 15, so need to pad the segment with its earlier frames if seg_size is greater than it
- participant_list <= those who are in the train or validation or test set (a list of numbers/codes for the participants)
- data_path = '/data/zak/graph/ntu/train'
- BATCH_SIZE <== For the model
- temporal_aug_k <== Defines number of random samples from one segment (for temporal augmentation)

In [3]:
class NTUDataset(Dataset):
    
    def __init__(self, data_path, sample_set, kp_shape=(25, 3), seg_size=40):
        # Initialize all parameters for the model
        self.sample_set = sample_set
        self.kp_shape = kp_shape
        self.seg_size = seg_size
        self.data_path = data_path
    
    def __len__(self):
        # Number of samples in the dataset
        return len(self.sample_set)
    
    def __getitem__(self, idx):
        # Return a particular item from the dataset
        sample_name = self.sample_set[idx]
        sample_path = os.path.join(self.data_path, sample_name)
        
        # Process the sample into tensor keypoints for the given index
        sample_kp, action_class = self.read_sample(sample_path, sample_name)
        
        return sample_kp, action_class
    
    # ----- Helper functions -----
    def read_sample(self, sample_path, sample_name):
        data = np.load(sample_path, allow_pickle=True).item()
        # Each data sample has the following keys:
        # dict_keys(['file_name', 'nbodys', 'njoints', 'skel_body0', 'rgb_body0', 'depth_body0', 'skel_body1', 'rgb_body1', 'depth_body1'])
        # For now, I am just considering one participant for each video segment and taking 'skel_body0' as input keypoints
        kps = self.augment_kp(data['skel_body0'])
        action_class = int(sample_name.split('A')[1][:3])
        return kps, action_class
    
    def augment_kp(self, sample_kp):
        # Temporally augment video segment based on the minimum segment size for the dataset
        # Randomly take "seg_size" number of frames from the segment (in chronological order)
        sample_size = sample_kp.shape[0]
        if sample_size < self.seg_size:
            # Pad same frames at the end in order to meet the segment size requirement
            return self.pad_frames(torch.tensor(sample_kp))
        rand_segments = sorted(random.sample(range(0, sample_size), self.seg_size))
        sample_kp = torch.tensor(np.take(sample_kp, rand_segments, axis=0))
        return sample_kp
    
    def pad_frames(self, sample_kp):
        # Consider seg_size for the dataset is 40 and the current sample has only 15 frames in the segment
        # We will need to repeat the frames in order to make it reach 40
        padded_kp = sample_kp
        sample_size = sample_kp.shape[0]
        additional_frames = self.seg_size - sample_size
        while additional_frames >= sample_size:
            padded_kp = torch.cat((padded_kp, sample_kp), dim=0)
            additional_frames -= sample_size
            
        padded_kp = torch.cat((padded_kp, sample_kp[:additional_frames]))
        return padded_kp
            
            
            

# Utility Functions for Dataloader

In [4]:
import os
import random

# samples file_name = 'S018C001P042R002A120.skeleton.npy'
# P042 is the participant number
# Remember that I am trying to split the dataset based on the participants and not the total samples
# This means that the validation set will have samples from all unique participants that are not involved in the train set

def get_participant_number(file_name):
    return file_name.split('P')[1][:3]

def split_participants(data_path, val_pct=0.2):
    # Returns a random list of participants for the train and validation sets respectively
    samples = os.listdir(data_path)
    total_samples = len(samples)
    # Get all unique participant numbers
    all_participants = set()
    for sample in samples:
        part = get_participant_number(sample)
        all_participants.add(part)
    total_participants = len(all_participants)
    all_participants = list(all_participants)
    
    # Split into train and val sets
    val_len = int(total_participants * val_pct)
    # Randomly shuffle the list
    random.shuffle(list(all_participants))
    train_participants = all_participants[val_len:]
    val_participants = all_participants[:val_len]

    print(f'Total Video Samples: {len(samples)} || Total Participants: {len(all_participants)} || Train Participants: {len(train_participants)} || Validation Participants: {len(val_participants)}')
    return train_participants, val_participants

def get_train_val_set(data_path, val_pct=0.2, temporal_aug_k=3):
    train_participants, val_participants = split_participants(data_path, val_pct)
    train_samples, val_samples = [], []
    # min_seg_size = 1000
    for sample in os.listdir(data_path):
        participant_number = get_participant_number(sample)
        # Temporary code to check the minimum segment size in the dataset
        # data = np.load(os.path.join(data_path, sample), allow_pickle=True).item()['skel_body0']
        # min_seg_size = min(min_seg_size, data.shape[0])
        
        # Apply data augmentation here ('k' times random temporal augmentation)
        for _ in range(temporal_aug_k):
            if participant_number in val_participants:
                val_samples.append(sample)
            else:
                train_samples.append(sample)
    
    # print(f'Minimum segment size in the dataset: {min_seg_size}')
    return train_samples, val_samples

### Example to load the dataset

In [5]:
train_samples, val_samples = get_train_val_set(data_path=data_dir, val_pct=0.2)
print(f'Train samples: {len(train_samples)} || Validation samples: {len(val_samples)}')

Total Video Samples: 114460 || Total Participants: 106 || Train Participants: 85 || Validation Participants: 21
Train samples: 290466 || Validation samples: 52914


In [6]:
# Load train dataset
train_set = NTUDataset(data_path=data_dir, sample_set=train_samples)
val_set = NTUDataset(data_path=data_dir, sample_set=val_samples)

BATCH_SIZE = 8
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=True)

In [10]:
x, y = next(iter(train_loader))

In [12]:
x.shape

torch.Size([8, 40, 25, 3])

In [13]:
y

tensor([76, 85, 72, 28, 85, 53,  5, 37])