In [3]:
#####################################################################################
# Audio-driven upper-body motion synthesis on a humanoid robot
# Computer Science Tripos Part III Project
# Jan Ondras (jo356@cam.ac.uk), Trinity College, University of Cambridge
# 2017/18
#####################################################################################

#######################################################################################################
# Segment whole VID sequences (of audio and pose features) into shorter chunks; 
# For all subjects;
#     using sliding window with stride one frame
# Pad first segments ONLY for TEST realtime (RT)
# Data loaded according to dataset split
# Segments saved for training
# Also do target normalisation => DON'T DO AGAIN
# DONE: for AF_logFB26_norm & AF_MFCC13_norm
#######################################################################################################

import numpy as np
import time
import glob
from evalutils import norm_Y

AF_type = 'AF_logFB26_norm'
# AF_type = 'AF_MFCC13_norm'

if AF_type == 'AF_logFB26_norm':
    SEG_folder = 'Segments_logFB26'
elif AF_type == 'AF_MFCC13_norm':
    SEG_folder = 'Segments_MFCC13'

TE_folder = 'TrainingExamples_16kHz'
# TE_folder = 'TrainingExamples'

print AF_type, SEG_folder

SEGMENT_LEN = 300 # segment length in number of (audio/video) frames; frame rate 100Hz
FPS = 100.
print "Segment length: {:d} frames <=> {:.2f} seconds\n" .format(SEGMENT_LEN, SEGMENT_LEN/FPS)
#######################################################################################################

unique_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_unique_srt_VIDs.npz')['unique_srt_VIDs']
all_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_VIDs.npz')['VIDs']
unique_srt_SIDs = np.array([x[:5] for i, x in enumerate(unique_srt_VIDs) if i % 2 == 0]) # ['PID02', 'PID05', ..

AF = np.load('./../Dataset/'+TE_folder+'/te_'+AF_type+'.npz')[AF_type]
N_features = AF.shape[1]
PF = np.load('./../Dataset/'+TE_folder+'/te_PF_smooth_LPBF_4.0.npz')['PF_smooth_LPBF']
PF = PF[:, :11]
N_targets = PF.shape[1]

#######################
# Target (Y) normalisation, into range 0-1 according to constraints
PF = norm_Y(PF)
print "Targets (Y) are TRANSFORMED to 0-1 range"

#######################
# Load the dataset split  
ds = np.load('./../Dataset/'+TE_folder+'/Dataset_split/split_masks_all.npz')
train_mask = ds['train_mask']
val_mask   = ds['val_mask']
test_mask  = ds['test_mask']

train_VIDs = ds['train_VIDs']
val_VIDs = ds['val_VIDs']
test_VIDs = ds['test_VIDs']
print "Train VIDs:", train_VIDs
print "Val VIDs:", val_VIDs
print "Test VIDs:", test_VIDs, "\n"

train_VIDs_sizes = ds['train_VIDs_ind_cnts']
val_VIDs_sizes = ds['val_VIDs_ind_cnts']
test_VIDs_sizes = ds['test_VIDs_ind_cnts']

X_train = AF[train_mask]
X_val   = AF[val_mask]
X_test  = AF[test_mask]
del AF
Y_train = PF[train_mask]
Y_val   = PF[val_mask]
Y_test  = PF[test_mask]
del PF

################################
# Segment training sequences
offset_f = 0 # frames
offset_s = 0 # segments

N_VID_segments = train_VIDs_sizes - SEGMENT_LEN + 1 # Array of number of segments per training VID; No padding

X = np.zeros((np.sum(N_VID_segments), SEGMENT_LEN, N_features))
Y = np.zeros((np.sum(N_VID_segments), SEGMENT_LEN, N_targets))

for i, N_VID_frames in enumerate(train_VIDs_sizes):
    
    for j in range(N_VID_segments[i]):

        X[offset_s+j] = X_train[offset_f+j:offset_f+j+SEGMENT_LEN]
        Y[offset_s+j] = Y_train[offset_f+j:offset_f+j+SEGMENT_LEN]

    offset_f += N_VID_frames
    offset_s += N_VID_segments[i]

del X_train
del Y_train

np.savez('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_train.npz', X=X, Y=Y, 
         SEGMENT_LEN=SEGMENT_LEN, train_VIDs=train_VIDs, N_train_seg=N_VID_segments)
print "TRAIN TOTAL: {:d} segments.\n".format(len(X))


################################
# Segment validation sequences
offset_f = 0 # frames
offset_s = 0 # segments

N_VID_segments = val_VIDs_sizes - SEGMENT_LEN + 1 # Array of number of segments per training VID; No padding

X = np.zeros((np.sum(N_VID_segments), SEGMENT_LEN, N_features))
Y = np.zeros((np.sum(N_VID_segments), SEGMENT_LEN, N_targets))

for i, N_VID_frames in enumerate(val_VIDs_sizes):
    
    for j in range(N_VID_segments[i]):

        X[offset_s+j] = X_val[offset_f+j:offset_f+j+SEGMENT_LEN]
        Y[offset_s+j] = Y_val[offset_f+j:offset_f+j+SEGMENT_LEN]

    offset_f += N_VID_frames
    offset_s += N_VID_segments[i]

del X_val
del Y_val

np.savez('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_val.npz', X=X, Y=Y, 
         SEGMENT_LEN=SEGMENT_LEN, val_VIDs=val_VIDs, N_val_seg=N_VID_segments)
print "VAL TOTAL: {:d} segments.\n".format(len(X))


################################
# Segment testing sequences
offset_f = 0 # frames
offset_s = 0 # segments

N_VID_segments = test_VIDs_sizes - SEGMENT_LEN + 1 # Array of number of segments per training VID; No padding

X = np.zeros((np.sum(N_VID_segments), SEGMENT_LEN, N_features))
Y = np.zeros((np.sum(N_VID_segments), SEGMENT_LEN, N_targets))

for i, N_VID_frames in enumerate(test_VIDs_sizes):
    
    for j in range(N_VID_segments[i]):

        X[offset_s+j] = X_test[offset_f+j:offset_f+j+SEGMENT_LEN]
        Y[offset_s+j] = Y_test[offset_f+j:offset_f+j+SEGMENT_LEN]

    offset_f += N_VID_frames
    offset_s += N_VID_segments[i]

# del X_test
# del Y_test

np.savez('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_test.npz', X=X, Y=Y, 
         SEGMENT_LEN=SEGMENT_LEN, test_VIDs=test_VIDs, N_test_seg=N_VID_segments)
print "TEST TOTAL: {:d} segments.\n".format(len(X))


AF_logFB26_norm Segments_logFB26
Segment length: 300 frames <=> 3.00 seconds

Targets (Y) are TRANSFORMED to 0-1 range
Train VIDs: ['PID05Task2' 'PID05Task3' 'PID16Task2' 'PID16Task3' 'PID06Task2'
 'PID06Task3' 'PID09Task2' 'PID09Task3' 'PID22Task2' 'PID22Task3'
 'PID15Task2' 'PID15Task3' 'PID02Task2' 'PID02Task3' 'PID13Task2'
 'PID13Task3' 'PID21Task2' 'PID21Task3' 'PID26Task2' 'PID26Task3'
 'PID08Task2' 'PID08Task3' 'PID17Task2' 'PID17Task3' 'PID11Task2'
 'PID11Task3' 'PID10Task2' 'PID10Task3' 'PID24Task2' 'PID24Task3']
Val VIDs: ['PID25Task2' 'PID25Task3' 'PID20Task2' 'PID20Task3']
Test VIDs: ['PID18Task2' 'PID18Task3' 'PID23Task2' 'PID23Task3'] 

TRAIN TOTAL: 162086 segments.

VAL TOTAL: 20418 segments.

TEST TOTAL: 20194 segments.



In [4]:
# DONE
################################
# Segment testing sequences (for realtime testing: zero-pad segments at the beginning: #segments=#frames)
offset = 0

X = np.zeros((np.sum(test_VIDs_sizes), SEGMENT_LEN, N_features)) # #segments=#frames
Y = np.zeros((np.sum(test_VIDs_sizes), SEGMENT_LEN, N_targets))

for i, N_VID_frames in enumerate(test_VIDs_sizes):
    
    
    for j in range(N_VID_frames):
        # Do zero-padding at the beginning
        if j < SEGMENT_LEN - 1:
            #print Y[offset+j][SEGMENT_LEN - j - 1]
            #print X[offset+j, SEGMENT_LEN - j - 1:].shape, X_test[offset:offset+j+1].shape
            X[offset+j, SEGMENT_LEN - j - 1:] = X_test[offset:offset+j+1]
            Y[offset+j, SEGMENT_LEN - j - 1:] = Y_test[offset:offset+j+1]
            #print Y[offset+j][:] # to check the padding works!
        # Otherwise: as in the above section
        else:
            #print "a"
            X[offset+j] = X_test[offset+j-SEGMENT_LEN+1:offset+j+1]
            Y[offset+j] = Y_test[offset+j-SEGMENT_LEN+1:offset+j+1]

    offset += N_VID_frames

# del X_test
# del Y_test

np.savez('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_test_RT.npz', X=X, Y=Y, 
         SEGMENT_LEN=SEGMENT_LEN, test_VIDs=test_VIDs, N_test_RT_seg=test_VIDs_sizes)
print "Real-time: TEST TOTAL: {:d} segments.\n".format(len(X))


Real-time: TEST TOTAL: 21390 segments.



In [2]:
# DONE; additionaly
################################
# Segment validation sequences (for realtime testing: zero-pad segments at the beginning: #segments=#frames)

import numpy as np
import time
import glob
from evalutils import norm_Y

AF_type = 'AF_logFB26_norm'
# AF_type = 'AF_MFCC13_norm'

if AF_type == 'AF_logFB26_norm':
    SEG_folder = 'Segments_logFB26'
elif AF_type == 'AF_MFCC13_norm':
    SEG_folder = 'Segments_MFCC13'

TE_folder = 'TrainingExamples_16kHz'
# TE_folder = 'TrainingExamples'

print AF_type, SEG_folder

SEGMENT_LEN = 300 # segment length in number of (audio/video) frames; frame rate 100Hz
FPS = 100.
print "Segment length: {:d} frames <=> {:.2f} seconds\n" .format(SEGMENT_LEN, SEGMENT_LEN/FPS)
#######################################################################################################

unique_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_unique_srt_VIDs.npz')['unique_srt_VIDs']
all_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_VIDs.npz')['VIDs']
unique_srt_SIDs = np.array([x[:5] for i, x in enumerate(unique_srt_VIDs) if i % 2 == 0]) # ['PID02', 'PID05', ..

AF = np.load('./../Dataset/'+TE_folder+'/te_'+AF_type+'.npz')[AF_type]
N_features = AF.shape[1]
PF = np.load('./../Dataset/'+TE_folder+'/te_PF_smooth_LPBF_4.0.npz')['PF_smooth_LPBF']
PF = PF[:, :11]
N_targets = PF.shape[1]

#######################
# Target (Y) normalisation, into range 0-1 according to constraints
PF = norm_Y(PF)
print "Targets (Y) are TRANSFORMED to 0-1 range"

#######################
# Load the dataset split  
ds = np.load('./../Dataset/'+TE_folder+'/Dataset_split/split_masks_all.npz')
train_mask = ds['train_mask']
val_mask   = ds['val_mask']
test_mask  = ds['test_mask']

train_VIDs = ds['train_VIDs']
val_VIDs = ds['val_VIDs']
test_VIDs = ds['test_VIDs']
print "Train VIDs:", train_VIDs
print "Val VIDs:", val_VIDs
print "Test VIDs:", test_VIDs, "\n"

train_VIDs_sizes = ds['train_VIDs_ind_cnts']
val_VIDs_sizes = ds['val_VIDs_ind_cnts']
test_VIDs_sizes = ds['test_VIDs_ind_cnts']

X_train = AF[train_mask]
X_val   = AF[val_mask]
X_test  = AF[test_mask]
del AF
Y_train = PF[train_mask]
Y_val   = PF[val_mask]
Y_test  = PF[test_mask]
del PF

############################################################################################
############################################################################################
############################################################################################
offset = 0

X = np.zeros((np.sum(val_VIDs_sizes), SEGMENT_LEN, N_features)) # #segments=#frames
Y = np.zeros((np.sum(val_VIDs_sizes), SEGMENT_LEN, N_targets))

for i, N_VID_frames in enumerate(val_VIDs_sizes):
    
    
    for j in range(N_VID_frames):
        # Do zero-padding at the beginning
        if j < SEGMENT_LEN - 1:
            #print Y[offset+j][SEGMENT_LEN - j - 1]
            #print X[offset+j, SEGMENT_LEN - j - 1:].shape, X_test[offset:offset+j+1].shape
            X[offset+j, SEGMENT_LEN - j - 1:] = X_val[offset:offset+j+1]
            Y[offset+j, SEGMENT_LEN - j - 1:] = Y_val[offset:offset+j+1]
            #print Y[offset+j][:] # to check the padding works!
        # Otherwise: as in the above section
        else:
            #print "a"
            X[offset+j] = X_val[offset+j-SEGMENT_LEN+1:offset+j+1]
            Y[offset+j] = Y_val[offset+j-SEGMENT_LEN+1:offset+j+1]

    offset += N_VID_frames

np.savez('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_val_RT.npz', X=X, Y=Y, 
         SEGMENT_LEN=SEGMENT_LEN, val_VIDs=val_VIDs, N_val_RT_seg=val_VIDs_sizes)
print "Real-time: VAL TOTAL: {:d} segments.\n".format(len(X))


AF_logFB26_norm Segments_logFB26
Segment length: 300 frames <=> 3.00 seconds

Targets (Y) are TRANSFORMED to 0-1 range
Train VIDs: ['PID05Task2' 'PID05Task3' 'PID16Task2' 'PID16Task3' 'PID06Task2'
 'PID06Task3' 'PID09Task2' 'PID09Task3' 'PID22Task2' 'PID22Task3'
 'PID15Task2' 'PID15Task3' 'PID02Task2' 'PID02Task3' 'PID13Task2'
 'PID13Task3' 'PID21Task2' 'PID21Task3' 'PID26Task2' 'PID26Task3'
 'PID08Task2' 'PID08Task3' 'PID17Task2' 'PID17Task3' 'PID11Task2'
 'PID11Task3' 'PID10Task2' 'PID10Task3' 'PID24Task2' 'PID24Task3']
Val VIDs: ['PID25Task2' 'PID25Task3' 'PID20Task2' 'PID20Task3']
Test VIDs: ['PID18Task2' 'PID18Task3' 'PID23Task2' 'PID23Task3'] 

Real-time: VAL TOTAL: 21614 segments.

