In [2]:
#####################################################################################
# Audio-driven upper-body motion synthesis on a humanoid robot
# Computer Science Tripos Part III Project
# Jan Ondras (jo356@cam.ac.uk), Trinity College, University of Cambridge
# 2017/18
#####################################################################################


#######################################################################################################
# Segment whole VID sequences (of audio and pose features) into shorter chunks; 
# For each subject (SID) separately;
#     using sliding window with stride one frame
# Pad first segments ONLY for TEST realtime (RT)
# Data loaded according to dataset split
# Segments saved for training
# Also do target normalisation => DON'T DO AGAIN
# DONE: for AF_logFB26_norm & AF_MFCC13_norm
#######################################################################################################

import numpy as np
import time
import glob
from evalutils import norm_Y

AF_type = 'AF_logFB26_norm'
# AF_type = 'AF_MFCC13_norm'

if AF_type == 'AF_logFB26_norm':
    SEG_folder = 'Segments_logFB26'
elif AF_type == 'AF_MFCC13_norm':
    SEG_folder = 'Segments_MFCC13'

TE_folder = 'TrainingExamples_16kHz'
# TE_folder = 'TrainingExamples'

SEGMENT_LEN = 300 # segment length in number of (audio/video) frames; frame rate 100Hz
FPS = 100.
print "Segment length: {:d} frames <=> {:.2f} seconds\n" .format(SEGMENT_LEN, SEGMENT_LEN/FPS)
#######################################################################################################

unique_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_unique_srt_VIDs.npz')['unique_srt_VIDs']
all_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_VIDs.npz')['VIDs']
unique_srt_SIDs = np.array([x[:5] for i, x in enumerate(unique_srt_VIDs) if i % 2 == 0]) # ['PID02', 'PID05', ..

AF = np.load('./../Dataset/'+TE_folder+'/te_'+AF_type+'.npz')[AF_type]
N_features = AF.shape[1]
PF = np.load('./../Dataset/'+TE_folder+'/te_PF_smooth_LPBF_4.0.npz')['PF_smooth_LPBF']
PF = PF[:, :11]
N_targets = PF.shape[1]

#######################
# Target (Y) normalisation, into range 0-1 according to constraints
PF = norm_Y(PF)
print "Targets (Y) are TRANSFORMED to 0-1 range"

##############################################
# Iterate over all subjects
st = time.time()
for s, SID in enumerate(unique_srt_SIDs):
    
    print SID
    
    # Per SID data
    SID_X_train_seg   = []
    SID_X_val_seg     = []
    SID_X_val_RT_seg  = []
    SID_X_test_seg    = []
    SID_X_test_RT_seg = []
    
    SID_Y_train_seg   = []
    SID_Y_val_seg     = []
    SID_Y_val_RT_seg  = []
    SID_Y_test_seg    = []
    SID_Y_test_RT_seg = []
    
    # List of 2 elements, it will contain number of segments for the 2 VIDs of this subject
    N_train_seg = []
    N_val_seg = []
    N_val_RT_seg = []
    N_test_seg = []
    N_test_RT_seg = []
       
    # Get indicies of both VIDs corresponding to this SID
    for t, task in enumerate(['Task2', 'Task3']):
        
        VID = SID + task
        
        #######################
        # Load the dataset split (for this VID);
        ds = np.load('./../Dataset/'+TE_folder+'/Dataset_split/split_masks_' + VID + '.npz')
        train_mask = ds['train_mask']
        val_mask   = ds['val_mask']
        test_mask  = ds['test_mask']

        X_train = AF[train_mask]
        X_val   = AF[val_mask]
        X_test  = AF[test_mask]

        Y_train = PF[train_mask]
        Y_val   = PF[val_mask]
        Y_test  = PF[test_mask]
        
        ################################
        # Segment training sequence
        N_segments = len(train_mask) - SEGMENT_LEN + 1 # number of segments for training; No padding

        X_train_seg = np.zeros((N_segments, SEGMENT_LEN, N_features))
        Y_train_seg = np.zeros((N_segments, SEGMENT_LEN, N_targets))
        
        for i in range(N_segments):
            X_train_seg[i] = X_train[i:i+SEGMENT_LEN]
            Y_train_seg[i] = Y_train[i:i+SEGMENT_LEN]
        #print "\tSegmented TRAIN dataset shape: ", X_train_seg.shape, Y_train_seg.shape
        SID_X_train_seg.append( X_train_seg )
        SID_Y_train_seg.append( Y_train_seg )
        N_train_seg.append(N_segments)
        
        ################################
        # Segment validation sequence
        N_segments = len(val_mask) - SEGMENT_LEN + 1 # number of segments for validation; No padding

        X_val_seg = np.zeros((N_segments, SEGMENT_LEN, N_features))
        Y_val_seg = np.zeros((N_segments, SEGMENT_LEN, N_targets))
        
        for i in range(N_segments):
            X_val_seg[i] = X_val[i:i+SEGMENT_LEN]
            Y_val_seg[i] = Y_val[i:i+SEGMENT_LEN]
        #print "\tSegmented TRAIN dataset shape: ", X_train_seg.shape, Y_train_seg.shape
        SID_X_val_seg.append( X_val_seg )
        SID_Y_val_seg.append( Y_val_seg )
        N_val_seg.append(N_segments)
        
        ################################
        # Segment validation sequence: FOR REALTIME TESTING
        N_segments = len(val_mask) # number of segments == number of frames; segments at the beggining are zero-padded

        X_val_RT_seg = np.zeros((N_segments, SEGMENT_LEN, N_features))
        Y_val_RT_seg = np.zeros((N_segments, SEGMENT_LEN, N_targets))
        
        for i in range(N_segments):
            # Do zero-padding at the beginning
            if i < SEGMENT_LEN - 1:
                #print Y[offset+j][SEGMENT_LEN - j - 1]
                #print X[offset+j, SEGMENT_LEN - j - 1:].shape, X_val[offset:offset+j+1].shape
                X_val_RT_seg[i, SEGMENT_LEN - i - 1:] = X_val[:i+1]
                Y_val_RT_seg[i, SEGMENT_LEN - i - 1:] = Y_val[:i+1]
                #print Y[offset+j][:] # to check the padding works!
            # Otherwise: as in the above section
            else:
                X_val_RT_seg[i] = X_val[i-SEGMENT_LEN+1:i+1]
                Y_val_RT_seg[i] = Y_val[i-SEGMENT_LEN+1:i+1]
            
        #print "\tSegmented TRAIN dataset shape: ", X_train_seg.shape, Y_train_seg.shape
        SID_X_val_RT_seg.append( X_val_RT_seg )
        SID_Y_val_RT_seg.append( Y_val_RT_seg )
        N_val_RT_seg.append(N_segments)
        
        ################################
        # Segment testing sequence
        N_segments = len(test_mask) - SEGMENT_LEN + 1 # number of segments for testing; No padding

        X_test_seg = np.zeros((N_segments, SEGMENT_LEN, N_features))
        Y_test_seg = np.zeros((N_segments, SEGMENT_LEN, N_targets))
        
        for i in range(N_segments):
            X_test_seg[i] = X_test[i:i+SEGMENT_LEN]
            Y_test_seg[i] = Y_test[i:i+SEGMENT_LEN]
        #print "\tSegmented TRAIN dataset shape: ", X_train_seg.shape, Y_train_seg.shape
        SID_X_test_seg.append( X_test_seg )
        SID_Y_test_seg.append( Y_test_seg )
        N_test_seg.append(N_segments)
        
        ################################
        # Segment testing sequence: FOR REALTIME TESTING
        N_segments = len(test_mask) # number of segments == number of frames; segments at the beggining are zero-padded

        X_test_RT_seg = np.zeros((N_segments, SEGMENT_LEN, N_features))
        Y_test_RT_seg = np.zeros((N_segments, SEGMENT_LEN, N_targets))
        
        for i in range(N_segments):
            # Do zero-padding at the beginning
            if i < SEGMENT_LEN - 1:
                #print Y[offset+j][SEGMENT_LEN - j - 1]
                #print X[offset+j, SEGMENT_LEN - j - 1:].shape, X_test[offset:offset+j+1].shape
                X_test_RT_seg[i, SEGMENT_LEN - i - 1:] = X_test[:i+1]
                Y_test_RT_seg[i, SEGMENT_LEN - i - 1:] = Y_test[:i+1]
                #print Y[offset+j][:] # to check the padding works!
            # Otherwise: as in the above section
            else:
                X_test_RT_seg[i] = X_test[i-SEGMENT_LEN+1:i+1]
                Y_test_RT_seg[i] = Y_test[i-SEGMENT_LEN+1:i+1]
            
        #print "\tSegmented TRAIN dataset shape: ", X_train_seg.shape, Y_train_seg.shape
        SID_X_test_RT_seg.append( X_test_RT_seg )
        SID_Y_test_RT_seg.append( Y_test_RT_seg )
        N_test_RT_seg.append(N_segments)
        
        ################################  
        # Save per VID
        np.savez('./../Dataset/'+TE_folder+'/' + SEG_folder + '/perVID/seg_' + VID + '.npz', 
                 X_train=X_train_seg, X_val=X_val_seg, X_val_RT=X_val_RT_seg, X_test=X_test_seg, X_test_RT=X_test_RT_seg, 
                 Y_train=Y_train_seg, Y_val=Y_val_seg, Y_val_RT=Y_val_RT_seg, Y_test=Y_test_seg, Y_test_RT=Y_test_RT_seg, 
                 SEGMENT_LEN=SEGMENT_LEN)

    #print SID_X_train_seg[0].shape, SID_X_train_seg[1].shape
    
    SID_X_train_seg   = np.concatenate(SID_X_train_seg, axis=0)
    SID_X_val_seg     = np.concatenate(SID_X_val_seg, axis=0)
    SID_X_val_RT_seg  = np.concatenate(SID_X_val_RT_seg, axis=0)
    SID_X_test_seg    = np.concatenate(SID_X_test_seg, axis=0)
    SID_X_test_RT_seg = np.concatenate(SID_X_test_RT_seg, axis=0)
    
    SID_Y_train_seg   = np.concatenate(SID_Y_train_seg, axis=0)
    SID_Y_val_seg     = np.concatenate(SID_Y_val_seg, axis=0)
    SID_Y_val_RT_seg  = np.concatenate(SID_Y_val_RT_seg, axis=0)
    SID_Y_test_seg    = np.concatenate(SID_Y_test_seg, axis=0)
    SID_Y_test_RT_seg = np.concatenate(SID_Y_test_RT_seg, axis=0)
    
    #print SID_X_train_seg.shape
    print "\t", SID_X_train_seg.shape,   SID_Y_train_seg.shape, N_train_seg
    print "\t", SID_X_val_seg.shape,     SID_Y_val_seg.shape, N_val_seg
    print "\t", SID_X_val_RT_seg.shape,  SID_Y_val_RT_seg.shape, N_val_RT_seg
    print "\t", SID_X_test_seg.shape,    SID_Y_test_seg.shape, N_test_seg
    print "\t", SID_X_test_RT_seg.shape, SID_Y_test_RT_seg.shape, N_test_RT_seg
        
    # Save per subject (SID)
    np.savez('./../Dataset/'+TE_folder+'/' + SEG_folder + '/perSID/seg_' + SID + '.npz', SEGMENT_LEN=SEGMENT_LEN, 
             X_train=SID_X_train_seg, X_val=SID_X_val_seg, X_val_RT=SID_X_val_RT_seg, X_test=SID_X_test_seg, X_test_RT=SID_X_test_RT_seg, 
             Y_train=SID_Y_train_seg, Y_val=SID_Y_val_seg, Y_val_RT=SID_Y_val_RT_seg, Y_test=SID_Y_test_seg, Y_test_RT=SID_Y_test_RT_seg, 
             N_train_seg=N_train_seg, N_val_seg=N_val_seg, N_val_RT_seg=N_val_RT_seg, N_test_seg=N_test_seg, N_test_RT_seg=N_test_RT_seg)       

    print "\tTime taken: ", time.time()-st, (time.time()-st)/60. 

Segment length: 300 frames <=> 3.00 seconds

Targets (Y) are TRANSFORMED to 0-1 range
PID02
	(9230, 300, 26) (9230, 300, 11) [5042, 4188]
	(1507, 300, 26) (1507, 300, 11) [845, 662]
	(2105, 300, 26) (2105, 300, 11) [1144, 961]
	(1507, 300, 26) (1507, 300, 11) [845, 662]
	(2105, 300, 26) (2105, 300, 11) [1144, 961]
	Time taken:  62.1498968601 1.03583239714
PID05
	(10216, 300, 26) (10216, 300, 11) [5036, 5180]
	(1718, 300, 26) (1718, 300, 11) [843, 875]
	(2316, 300, 26) (2316, 300, 11) [1142, 1174]
	(1718, 300, 26) (1718, 300, 11) [843, 875]
	(2316, 300, 26) (2316, 300, 11) [1142, 1174]
	Time taken:  132.133527994 2.20222613017
PID06
	(7850, 300, 26) (7850, 300, 11) [3828, 4022]
	(1210, 300, 26) (1210, 300, 11) [584, 626]
	(1808, 300, 26) (1808, 300, 11) [883, 925]
	(1210, 300, 26) (1210, 300, 11) [584, 626]
	(1808, 300, 26) (1808, 300, 11) [883, 925]
	Time taken:  185.763033867 3.09605128368
PID08
	(8352, 300, 26) (8352, 300, 11) [4286, 4066]
	(1319, 300, 26) (1319, 300, 11) [683, 636]
