In [None]:
#####################################################################################
# Audio-driven upper-body motion synthesis on a humanoid robot
# Computer Science Tripos Part III Project
# Jan Ondras (jo356@cam.ac.uk), Trinity College, University of Cambridge
# 2017/18
#####################################################################################
# Training, validation and testing of the LSTM-SI model
# (for various dropout probabilities)
#####################################################################################

In [1]:
###############################################################################################################
# LSTM Training, cross-validation and testing
# SUBJECT INDEPENDENT
###############################################################################################################
# Load segmented data, already split
# NOT Rescale target angles to range [0,1] (already done when segmenting)
# NOT: z-norm audio features, subject-independently
###############################################################################################################

import numpy as np

AF_type = 'AF_logFB26_norm'
# AF_type = 'AF_MFCC13_norm'

if AF_type == 'AF_logFB26_norm':
    SEG_folder = 'Segments_logFB26'
elif AF_type == 'AF_MFCC13_norm':
    SEG_folder = 'Segments_MFCC13'

TE_folder = 'TrainingExamples_16kHz'

# Load segmented data, already split
X_train = np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_train.npz')['X'] 
Y_train = np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_train.npz')['Y'] 

X_val = np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_val.npz')['X'] 
Y_val = np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_val.npz')['Y'] 

X_val_RT = np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_val_RT.npz')['X'] 
Y_val_RT = np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_val_RT.npz')['Y'] 

X_test = np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_test.npz')['X'] 
Y_test = np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_test.npz')['Y'] 

X_test_RT = np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_test_RT.npz')['X'] 
Y_test_RT = np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_test_RT.npz')['Y'] 

train_VIDs = np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_train.npz')['train_VIDs']
val_VIDs =   np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_val.npz')['val_VIDs']
test_VIDs =  np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_test.npz')['test_VIDs']
print "Train VIDs:", train_VIDs
print "Val VIDs:", val_VIDs
print "Test VIDs:", test_VIDs, "\n"

N_train_seg =    np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_train.npz')['N_train_seg']
N_val_seg =      np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_val.npz')['N_val_seg']
N_val_RT_seg =  np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_val_RT.npz')['N_val_RT_seg']
N_test_seg =     np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_test.npz')['N_test_seg']
N_test_RT_seg =  np.load('./../Dataset/'+TE_folder+'/' + SEG_folder + '/seg_test_RT.npz')['N_test_RT_seg']

print "Data were loaded."

Train VIDs: ['PID05Task2' 'PID05Task3' 'PID16Task2' 'PID16Task3' 'PID06Task2'
 'PID06Task3' 'PID09Task2' 'PID09Task3' 'PID22Task2' 'PID22Task3'
 'PID15Task2' 'PID15Task3' 'PID02Task2' 'PID02Task3' 'PID13Task2'
 'PID13Task3' 'PID21Task2' 'PID21Task3' 'PID26Task2' 'PID26Task3'
 'PID08Task2' 'PID08Task3' 'PID17Task2' 'PID17Task3' 'PID11Task2'
 'PID11Task3' 'PID10Task2' 'PID10Task3' 'PID24Task2' 'PID24Task3']
Val VIDs: ['PID25Task2' 'PID25Task3' 'PID20Task2' 'PID20Task3']
Test VIDs: ['PID18Task2' 'PID18Task3' 'PID23Task2' 'PID23Task3'] 

Data were loaded.


In [None]:
######################################################################################################
# ARCHITECTURE TUNING
# TRAIN & VALIDATE
# DONE
######################################################################################################
# Sequence tagging approach (LSTMs)
 # if all feature values at a timestep are equal to mask_value=0., then the timestep is skipped

import time
from keras.models import Sequential
from keras.layers import Dense, LSTM, Masking, TimeDistributed, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint

rand_seed = 37 
np.random.seed(rand_seed) # for reproducibility
   
model_checkpoint_path_prefix = './ModelCheckpoints/LSTM_SI/'
save_training_hist_path_prefix = './../Dataset/'+TE_folder+'/Results/LSTM_SI/trainHistES_' # ES = early stop

N_runs = 10
N_runs = 1
SEGMENT_LEN = 300 # == X_train.shape[1]
# useDropout = True
useDropout = False
# dropouts = [0.5, 0.5]
N_epochs = 100                    # this value is saved as N_epochs
train_batch_size = 15000 # same for all models; for LOGFB26 and also for MFCC13
N_features = X_train.shape[2]
N_targets  = Y_train.shape[2]

N_LSTM_units_range = [3, 6, 9, 12, 15, 18, 21, 24, 27]

st = time.time()
for N_LSTM_units in N_LSTM_units_range:

    print("Segment length: {:d};\n Train batch size: {:d};\n LSTM units: {:d};\n Max epochs: {:d};\n #features: {:d};\n #targets: {:d};\n #runs: {:d};"
          .format(SEGMENT_LEN, train_batch_size, N_LSTM_units, N_epochs, N_features, N_targets, N_runs))

    # SET MODEL TYPE
    model_type = '{:d}_{:d}_{:02d}'.format(N_runs, N_features, N_LSTM_units)
    if useDropout:
        model_type = model_type + '_DROP_{:.2f}_{:.2f}'.format(dropouts[0], dropouts[1])
    print "MODEL TYPE:\t\t", model_type

    # Create LSTM model
    model = Sequential()
    model.add( Masking(mask_value=0., input_shape=(SEGMENT_LEN, N_features)) )
    if useDropout:
        model.add( Dropout(dropouts[0]) )
    model.add( LSTM(N_LSTM_units, return_sequences=True) )
    if useDropout:
        model.add( Dropout(dropouts[1]) )    
    model.add( TimeDistributed(Dense(N_targets, activation='sigmoid')) )
    model.compile(loss='mean_squared_error', optimizer='adam')
    print model.summary()

    # Set early stopping
    early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

    # Checkpoint model weights and the model itself: at each epoch
    #model_checkpoint_name = 'm_{epoch:04d}_{loss:.4f}_{val_loss:.4f}.hdf5'
    #model_checkpoint = ModelCheckpoint(model_checkpoint_path_prefix + model_checkpoint_name, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

    loss = []
    val_loss = []
    for i in range(N_runs):
        # Tain & validate
        hist = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=N_epochs, shuffle=True,  
                         batch_size=train_batch_size, verbose=1, 
                         #callbacks=[early_stop, model_checkpoint]
                         callbacks=[early_stop]
                        )
        loss.append( hist.history['loss'] )
        val_loss.append( hist.history['val_loss'] )
    # Save training history
    np.savez(save_training_hist_path_prefix + model_type + '.npz', 
            loss=loss, val_loss=val_loss, 
            N_params=model.count_params(), train_batch_size=train_batch_size, 
            N_runs=N_runs, N_LSTM_units=N_LSTM_units, N_epochs=N_epochs)
    print "Saved training history."
    print "\tTime taken: ", time.time()-st, (time.time()-st)/60. 

In [None]:
######################################################################################################
# TRAIN & SAVE best MODELS, also see effect of dropout; for best N_LSTM_units = 12
# 3 models
# DONE
######################################################################################################
# Sequence tagging approach (LSTMs)
 # if all feature values at a timestep are equal to mask_value=0., then the timestep is skipped
      
import time
import os
from keras.models import Sequential
from keras.layers import Dense, LSTM, Masking, TimeDistributed, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint

rand_seed = 37 
np.random.seed(rand_seed) # for reproducibility

model_checkpoint_path_prefix = './ModelCheckpoints/LSTM_SI/'
save_training_hist_path_prefix = './../Dataset/'+TE_folder+'/Results/LSTM_SI/trainHistES_' # ES = early stop

N_runs = 1
SEGMENT_LEN = 300 # == X_train.shape[1]
dropouts_range = [[0., 0.], [0.25, 0.25], [0.5, 0.5]]
N_epochs = 100                    # this value is saved as N_epochs
train_batch_size = 15000 # same for all models; for LOGFB26 and also for MFCC13
N_features = X_train.shape[2]
N_targets  = Y_train.shape[2]

N_LSTM_units = 12 # BEST FOUND in LSTM_SI

st = time.time()

for dropouts in dropouts_range:
    
    if dropouts[0] == 0. and dropouts[1] == 0.:
        useDropout = False
    else:
        useDropout = True

    print("Segment length: {:d};\n Train batch size: {:d};\n LSTM units: {:d};\n Max epochs: {:d};\n #features: {:d};\n #targets: {:d};\n #runs: {:d};"
          .format(SEGMENT_LEN, train_batch_size, N_LSTM_units, N_epochs, N_features, N_targets, N_runs))

    # SET MODEL TYPE
    model_type = '{:d}_{:d}_{:02d}'.format(N_runs, N_features, N_LSTM_units)
    model_type = model_type + '_DROP_{:.2f}_{:.2f}'.format(dropouts[0], dropouts[1])
    print "MODEL TYPE:\t\t", model_type
    if not os.path.isdir(model_checkpoint_path_prefix + model_type):
        os.mkdir(model_checkpoint_path_prefix + model_type)

    # Create LSTM model
    model = Sequential()
    model.add( Masking(mask_value=0., input_shape=(SEGMENT_LEN, N_features)) )
    if useDropout:
        model.add( Dropout(dropouts[0]) )
    model.add( LSTM(N_LSTM_units, return_sequences=True) )
    if useDropout:
        model.add( Dropout(dropouts[1]) )    
    model.add( TimeDistributed(Dense(N_targets, activation='sigmoid')) )
    model.compile(loss='mean_squared_error', optimizer='adam')
    print model.summary()

    # Set early stopping
    early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

    # Checkpoint model weights and the model itself: at each epoch
    model_checkpoint_name = 'm_{epoch:04d}_{loss:.4f}_{val_loss:.4f}.hdf5'
    model_checkpoint = ModelCheckpoint(model_checkpoint_path_prefix + model_type + '/' + model_checkpoint_name, monitor='val_loss', verbose=1, 
                                       save_best_only=True, save_weights_only=False, mode='auto', period=1)

    loss = []
    val_loss = []
    for i in range(N_runs):
        # Tain & validate
        hist = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=N_epochs, shuffle=True,  
                         batch_size=train_batch_size, verbose=1, 
                         callbacks=[early_stop, model_checkpoint]
                        )
        loss.append( hist.history['loss'] )
        val_loss.append( hist.history['val_loss'] )
    # Save training history
    np.savez(save_training_hist_path_prefix + model_type + '.npz', 
            loss=loss, val_loss=val_loss, 
            N_params=model.count_params(), train_batch_size=train_batch_size, 
            N_runs=N_runs, N_LSTM_units=N_LSTM_units, N_epochs=N_epochs)
    print "Saved training history: ", save_training_hist_path_prefix + model_type + '.npz'
    print "Best model saved to: ", model_checkpoint_path_prefix + model_type + '/' + model_checkpoint_name
    print "\tTime taken: ", time.time()-st, (time.time()-st)/60.  

In [None]:
######################################################################################################
# Evaluate 3 models from above on VALIDATION & TEST SET;
# save results
# DONE
######################################################################################################
from postprocessingutils import save_predictions_and_eval
from keras.models import load_model
import glob

model_checkpoint_path_prefix = './ModelCheckpoints/LSTM_SI/'
save_results_path_prefix = './../Dataset/'+TE_folder+'/Results/LSTM_SI/'

N_runs = 1
SEGMENT_LEN = 300 # == X_train.shape[1]
dropouts_range = [[0., 0.], [0.25, 0.25], [0.5, 0.5]]
N_epochs = 100                    # this value is saved as N_epochs
train_batch_size = 15000 # same for all models; for LOGFB26 and also for MFCC13
N_features = X_train.shape[2]

N_LSTM_units = 12 # BEST FOUND in LSTM_SI

st = time.time()

for dropouts in dropouts_range:
    
    if dropouts[0] == 0. and dropouts[1] == 0.:
        useDropout = False
    else:
        useDropout = True

    # SET MODEL TYPE
    model_type = '{:d}_{:d}_{:02d}_DROP_{:.2f}_{:.2f}'.format(N_runs, N_features, N_LSTM_units, dropouts[0], dropouts[1])
    print "MODEL TYPE:\t\t", model_type
    
    #########################################
    # Load trained model
    test_model_name = sorted(glob.glob(model_checkpoint_path_prefix + model_type + '/m_*'))[-1]
    print "Loaded model:", test_model_name
    model = load_model( test_model_name )
    N_epochs = int( (test_model_name.split('/')[-1]).split('_')[1] )

    ###############################################################################################################
    # Evaluate on validation set
    # ONLINE TESTING (as if new timesteps arrive one-by-one)
    
    val_batch_size = X_val_RT.shape[0]
    Y_val_pred = model.predict(X_val_RT, batch_size=val_batch_size, verbose=1)  
    Y_val_pred = Y_val_pred[:, -1, :] # last item from each segment is the (ONLINE) final prediction
    Y_val_true = Y_val_RT[:, -1, :]
    X_val_RT_last = X_val_RT[:, -1, :]
    print X_val_RT.shape, X_val_RT_last.shape, Y_val_pred.shape, Y_val_true.shape
    
    # Save results: predictions will be saved in radians; for generation on robot
    # Raw and smoothed (low-pass 4Hz)
    from postprocessingutils import save_predictions_and_eval
    save_predictions_and_eval(save_results_path_prefix + 'MSBMvaltest_' + model_type, 
                     X_val_RT_last, Y_val_true, Y_val_pred, 'LSTM_SI', SEGMENT_LEN, val_VIDs, N_val_RT_seg, 
                             N_params=model.count_params(), N_epochs=N_epochs  )
    
    ###############################################################################################################
    # Evaluate on testing set: 
    # ONLINE TESTING (as if new timesteps arrive one-by-one)
    
    test_batch_size = X_test_RT.shape[0]
    Y_test_pred = model.predict(X_test_RT, batch_size=test_batch_size, verbose=1)
    Y_test_pred = Y_test_pred[:, -1, :] # last item from each segment is the (ONLINE) final prediction
    Y_test_true = Y_test_RT[:, -1, :]
    X_test_RT_last = X_test_RT[:, -1, :]
    print X_test_RT.shape, X_test_RT_last.shape, Y_test_pred.shape, Y_test_true.shape

    # Save results: predictions will be saved in radians; for generation on robot
    # Raw and smoothed (low-pass 4Hz)
    from postprocessingutils import save_predictions_and_eval
    save_predictions_and_eval(save_results_path_prefix + 'MSBMtest_' + model_type, 
                     X_test_RT_last, Y_test_true, Y_test_pred, 'LSTM_SI', SEGMENT_LEN, test_VIDs, N_test_RT_seg, 
                             N_params=model.count_params(), N_epochs=N_epochs )       
    
    print "\tTime taken: ", time.time()-st, (time.time()-st)/60.  

In [None]:
##################################################################################################
# OPTIONAL: Load already trained model?
##################################################################################################
from keras.models import load_model
test_model_name = './ModelCheckpoints/LSTM_SI/m_0045_0.0165_0.0185.hdf5'
model = load_model(test_model_name)
print model.summary()

In [None]:
###############################################################################################################
# Testing (on RT segments, always): offline test (low-pass filt); online test (Kalman filter)
###############################################################################################################

from evalutils import eval_test, plot_predictions, inv_norm_Y

# ONLINE TESTING (as if new timesteps arrive one-by-one)
save_results_path_prefix = './../Dataset/'+TE_folder+'/Results/LSTM_SI/'
test_batch_size = X_test_RT.shape[0]

# Predict on test set
Y_test_pred = model.predict(X_test_RT, batch_size=test_batch_size, verbose=1)
Y_test_pred = Y_test_pred[:, -1, :] # last item from each segment is the (ONLINE) final prediction
Y_test_true = Y_test_RT[:, -1, :]
X_test_RT_last = X_test_RT[:, -1, :]
print X_test_RT.shape, X_test_RT_last.shape, Y_test_pred.shape, Y_test_true.shape

###############################################################################################################
# Save results: predictions will be saved in radians; for generation on robot
# Raw and smoothed (LPBF_4)
from postprocessingutils import save_predictions_and_eval
save_predictions_and_eval(save_results_path_prefix + 'test_1', 
                 X_test_RT_last, Y_test_true, Y_test_pred, 'LSTM_SI', SEGMENT_LEN, test_VIDs, N_test_RT_seg)


In [None]:
###############################################################################################################
# Show testing results
###############################################################################################################

import numpy as np
import time
import glob
from evalutils import show_test_results, plot_predictions

TE_folder = 'TrainingExamples_16kHz'
save_results_path_prefix = './../Dataset/'+TE_folder+'/Results/LSTM_SI/'
d = np.load(save_results_path_prefix + 'test_1.npz')

###############################################################################
# Show testing results: for raw Y and smoothed Y
print "===========================================\nRaw=====================\n"
show_test_results(d['results_raw'])
print "===========================================\nSmooth=====================\n"
show_test_results(d['results_smooth'])
print "================================================================\n"

###############################################################################
# Plot predictions (post-smoothed and raw) against ground truths and audio 
t_VID = 3 # test VID to show
if t_VID >= len(d['Y_raw_list']):
    raise ValueError("Required test VID is out of bounds!")
Y_true = d['Y_true_list'][t_VID]
Y_raw = d['Y_raw_list'][t_VID]
Y_smooth = d['Y_smooth_list'][t_VID]
test_VID = d['test_VIDs'][t_VID]

plot_predictions(Y_true, Y_raw, Y_smooth, 'LSTM_SI', angles_to_show='all', 
                     plot_start=1.0, plot_length=3.0, input_mode='time', SD_offset=None, 
                     test_VID=test_VID)