In [None]:
#####################################################################################
# Audio-driven upper-body motion synthesis on a humanoid robot
# Computer Science Tripos Part III Project
# Jan Ondras (jo356@cam.ac.uk), Trinity College, University of Cambridge
# 2017/18
#####################################################################################
# Training and testing of the MLP-SD model (uses the best architecture found for MLP-SI)
#####################################################################################

In [None]:
#######################################################################################################
# Baseline MLP (assuming independence between timesteps)
# Subject-dependent
#######################################################################################################
#######################################################################################################
# FINAL TRAINING 
# & TESTING on VALIDATION SET (all feature sets)
# & TESTING on TESTING SET (best feature set only)
# Save trained models
# Train a model for each subject separately, using the best architecture for feature set determmined by MLP_SI
# No dropout
# DONE
#######################################################################################################

import numpy as np
import time
import glob

from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD, Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt

from geoutils import radToDeg, degToRad
from evalutils import get_global_cca, get_local_cca, eval_test, norm_Y, inv_norm_Y, plot_predictions

SEGMENT_LEN = 300 # for evaluation (local cca)

tuning_types = [
    '1_35_AF13',
    '1_35_AF26', 
    '1_35_AF52', 
    '1_35_AF78'
]
AF_types = [
    'AF_MFCC13_norm',
    'AF_logFB26_norm',
    'AF_logFB52_norm',
    'AF_logFB78_norm'
]

np.random.seed(37) # for reproducibility

TE_folder = 'TrainingExamples_16kHz'
save_results_path_prefix = './../Dataset/'+TE_folder+'/Results/MLP_SD/XXX'
model_checkpoint_path_prefix = './ModelCheckpoints/MLP_SD/'

unique_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_unique_srt_VIDs.npz')['unique_srt_VIDs']
all_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_VIDs.npz')['VIDs']
unique_srt_SIDs = np.array([x[:5] for i, x in enumerate(unique_srt_VIDs) if i % 2 == 0]) # ['PID02', 'PID05', ..

PF = np.load('./../Dataset/'+TE_folder+'/te_PF_smooth_LPBF_4.0.npz')['PF_smooth_LPBF']
PF = PF[:, :11]
N_targets = PF.shape[1]
###########
# Target (Y) normalisation, into range 0-1 according to constraints
PF = norm_Y(PF)
print "Targets (Y) are TRANSFORMED to 0-1 range"

# Learning settings
epochs = 1000
dropout = 0.

st = time.time()
for tuning_type, AF_type in zip(tuning_types, AF_types):
    
    print TE_folder, AF_type, tuning_type
    if tuning_type[-2:] != AF_type.split('_')[1][-2:]:
        raise ValueError("Tuning type and audio feature type mismatch!")
        
        
    AF = np.load('./../Dataset/'+TE_folder+'/te_'+AF_type+'.npz')[AF_type]
    N_features = AF.shape[1]
    
    ###############################################
    # ARCHITECTURE settings: found on MLP_SI
    best_val_arch_path_prefix = './../Dataset/'+TE_folder+'/Results/MLP_SI/XXX'
    dd = np.load(best_val_arch_path_prefix + 'val_' + tuning_type + '.npz')
    best_N_hl = int(dd['best_N_hl'])
    best_N_hu = int(dd['best_N_hu'])
    print "\tOptimal number of hidden layers / hidden units: ", best_N_hl, " / ", best_N_hu
    
    # Iterate over subjects
    for s, SID in enumerate(unique_srt_SIDs):

        print SID

        #######################
        # Load the dataset split (for this SID); concat from both VIDs
        ds1 = np.load('./../Dataset/'+TE_folder+'/Dataset_split/split_masks_' + SID  + 'Task2.npz')
        ds2 = np.load('./../Dataset/'+TE_folder+'/Dataset_split/split_masks_' + SID  + 'Task3.npz')
        train_mask = np.concatenate( (ds1['train_mask'], ds2['train_mask']) )
        val_mask   = np.concatenate( (ds1['val_mask'],   ds2['val_mask']) )
        test_mask  = np.concatenate( (ds1['test_mask'],  ds2['test_mask']) )

        X_train = AF[train_mask]
        X_val   = AF[val_mask]
        X_test  = AF[test_mask]

        Y_train = PF[train_mask]
        Y_val   = PF[val_mask]
        Y_test  = PF[test_mask]

        train_batch_size = len(X_train)
        val_batch_size = len(X_val)
        test_batch_size = len(X_test)

        ##########################
        # Final train & test

        # Create model
        model = Sequential()
        model.add(Dense(best_N_hu, activation='relu', kernel_initializer='he_uniform', input_dim=N_features))
        #model.add(Dropout(dropout))
        for i in range(1, best_N_hl):
            model.add(Dense(best_N_hu, activation='relu', kernel_initializer='he_uniform'))
            #model.add(Dropout(dropout))
        model.add(Dense(N_targets, activation='sigmoid'))

        model.compile(loss='mean_squared_error', optimizer=Adam())
        #print model.summary()
        early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1) # stop after 10 epochs without improvement in val_acc

        # Checkpoint model weights and the model itself: at each epoch
        model_checkpoint_name = 'm_' + SID + '_{epoch:04d}_{loss:.4f}_{val_loss:.4f}.hdf5'
        model_checkpoint = ModelCheckpoint(model_checkpoint_path_prefix + tuning_type + '/' + model_checkpoint_name, monitor='val_loss', 
                                           verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)

        model.fit(X_train, Y_train, epochs=epochs, batch_size=train_batch_size, 
                   validation_data = (X_val, Y_val), verbose=0, callbacks=[early_stop, model_checkpoint])
        
        ###############################################################################################################
        # Evaluate on validation set
        Y_val_pred = model.predict(X_val, batch_size=val_batch_size, verbose=0)

        # Save results: predictions will be saved in radians; for generation on robot
        # Raw and smoothed 
        val_VIDs = [SID + 'Task2', SID + 'Task3']
        val_VIDs_ind_cnts = [len(ds1['val_mask']), len(ds2['val_mask'])]
        SD_offsets = [len(ds1['train_mask']), len(ds2['train_mask']) ]

        from postprocessingutils import save_predictions_and_eval
        save_predictions_and_eval(save_results_path_prefix + 'MSvaltest_' + SID + '_' + tuning_type, 
                                 X_val, Y_val, Y_val_pred, 'MLP_SD', SEGMENT_LEN, val_VIDs, val_VIDs_ind_cnts, 
                                 SD_offsets)
        
        ###############################################################################################################
        # Evaluate on testing set: only for 1_35_AF26
        if tuning_type == '1_35_AF26':
            Y_test_pred = model.predict(X_test, batch_size=test_batch_size, verbose=0)

            # Save results: predictions will be saved in radians; for generation on robot
            # Raw and smoothed 
            test_VIDs = [SID + 'Task2', SID + 'Task3']
            test_VIDs_ind_cnts = [len(ds1['test_mask']), len(ds2['test_mask'])]
            SD_offsets = [len(ds1['train_mask']) + len(ds1['val_mask']), len(ds2['train_mask']) + len(ds2['val_mask'])]

            from postprocessingutils import save_predictions_and_eval
            save_predictions_and_eval(save_results_path_prefix + 'MStest_' + SID + '_' + tuning_type, 
                                     X_test, Y_test, Y_test_pred, 'MLP_SD', SEGMENT_LEN, test_VIDs, test_VIDs_ind_cnts, 
                                     SD_offsets)

    print "\tTime taken: ", time.time()-st, (time.time()-st)/60. 
    

In [None]:
#######################################################################################################
# Re-Evaluate on VALIDATION & TEST SET using BEST MODEL
# DONE
#######################################################################################################

from keras.models import load_model

SEGMENT_LEN = 300 # for evaluation (local cca)

tuning_types = [
    '1_35_AF13',
    '1_35_AF26', 
    '1_35_AF52', 
    '1_35_AF78'
]
AF_types = [
    'AF_MFCC13_norm',
    'AF_logFB26_norm',
    'AF_logFB52_norm',
    'AF_logFB78_norm'
]

np.random.seed(37) # for reproducibility

TE_folder = 'TrainingExamples_16kHz'
save_results_path_prefix = './../Dataset/'+TE_folder+'/Results/MLP_SD/XXX'
model_checkpoint_path_prefix = './ModelCheckpoints/MLP_SD/'

unique_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_unique_srt_VIDs.npz')['unique_srt_VIDs']
all_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_VIDs.npz')['VIDs']
unique_srt_SIDs = np.array([x[:5] for i, x in enumerate(unique_srt_VIDs) if i % 2 == 0]) # ['PID02', 'PID05', ..

PF = np.load('./../Dataset/'+TE_folder+'/te_PF_smooth_LPBF_4.0.npz')['PF_smooth_LPBF']
PF = PF[:, :11]
N_targets = PF.shape[1]
###########
# Target (Y) normalisation, into range 0-1 according to constraints
PF = norm_Y(PF)
print "Targets (Y) are TRANSFORMED to 0-1 range"

# Learning settings
epochs = 1000
dropout = 0.

st = time.time()
for tuning_type, AF_type in zip(tuning_types, AF_types):
    
    print TE_folder, AF_type, tuning_type
    if tuning_type[-2:] != AF_type.split('_')[1][-2:]:
        raise ValueError("Tuning type and audio feature type mismatch!")
        
        
    AF = np.load('./../Dataset/'+TE_folder+'/te_'+AF_type+'.npz')[AF_type]
    N_features = AF.shape[1]
    
    # Iterate over subjects
    for s, SID in enumerate(unique_srt_SIDs):

        print SID

        #######################
        # Load the dataset split (for this SID); concat from both VIDs
        ds1 = np.load('./../Dataset/'+TE_folder+'/Dataset_split/split_masks_' + SID  + 'Task2.npz')
        ds2 = np.load('./../Dataset/'+TE_folder+'/Dataset_split/split_masks_' + SID  + 'Task3.npz')
        val_mask   = np.concatenate( (ds1['val_mask'],   ds2['val_mask']) )
        test_mask  = np.concatenate( (ds1['test_mask'],  ds2['test_mask']) )

        X_val   = AF[val_mask]
        X_test  = AF[test_mask]

        Y_val   = PF[val_mask]
        Y_test  = PF[test_mask]

        val_batch_size = len(X_val)
        test_batch_size = len(X_test)

        #######################
        # Load best model
        model_name = sorted(glob.glob( model_checkpoint_path_prefix + tuning_type + '/m_' + SID + '_*' ))[-1] # take best model = last checkpointed
        test_model_name = model_name
        print "Loading BEST model from:", test_model_name
        model = load_model(test_model_name)

        ###############################################################################################################
        # Evaluate on validation set
        Y_val_pred = model.predict(X_val, batch_size=val_batch_size, verbose=0)

        # Save results: predictions will be saved in radians; for generation on robot
        # Raw and smoothed 
        val_VIDs = [SID + 'Task2', SID + 'Task3']
        val_VIDs_ind_cnts = [len(ds1['val_mask']), len(ds2['val_mask'])]
        SD_offsets = [len(ds1['train_mask']), len(ds2['train_mask']) ]

        from postprocessingutils import save_predictions_and_eval
        save_predictions_and_eval(save_results_path_prefix + 'MSBMvaltest_' + SID + '_' + tuning_type, 
                                 X_val, Y_val, Y_val_pred, 'MLP_SD', SEGMENT_LEN, val_VIDs, val_VIDs_ind_cnts, 
                                 SD_offsets)
        
        ###############################################################################################################
        # Evaluate on testing set: only for 1_35_AF26
        if tuning_type == '1_35_AF26':
            Y_test_pred = model.predict(X_test, batch_size=test_batch_size, verbose=0)

            # Save results: predictions will be saved in radians; for generation on robot
            # Raw and smoothed 
            test_VIDs = [SID + 'Task2', SID + 'Task3']
            test_VIDs_ind_cnts = [len(ds1['test_mask']), len(ds2['test_mask'])]
            SD_offsets = [len(ds1['train_mask']) + len(ds1['val_mask']), len(ds2['train_mask']) + len(ds2['val_mask'])]

            from postprocessingutils import save_predictions_and_eval
            save_predictions_and_eval(save_results_path_prefix + 'MSBMtest_' + SID + '_' + tuning_type, 
                                     X_test, Y_test, Y_test_pred, 'MLP_SD', SEGMENT_LEN, test_VIDs, test_VIDs_ind_cnts, 
                                     SD_offsets)

    print "\tTime taken: ", time.time()-st, (time.time()-st)/60. 


In [None]:
#######################################################################################################
# Overall testing statistics over all subjects (subject-dependent case)
# ON VALIDATION SET
# DONE
#######################################################################################################
import numpy as np
import time
import glob
from geoutils import radToDeg
from evalutils import show_test_results

BM = True     # use best model, not last model
# BM = False

tuning_types = [
    '1_35_AF13',
    '1_35_AF26', 
    '1_35_AF52', 
    '1_35_AF78'
]

TE_folder = 'TrainingExamples_16kHz'

save_results_path_prefix = './../Dataset/'+TE_folder+'/Results/MLP_SD/XXX'

unique_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_unique_srt_VIDs.npz')['unique_srt_VIDs']
all_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_VIDs.npz')['VIDs']
unique_srt_SIDs = np.array([x[:5] for i, x in enumerate(unique_srt_VIDs) if i % 2 == 0]) # ['PID02', 'PID05', ..

for i, tuning_type in enumerate(tuning_types):
    print tuning_type
    
    rmse_overall_list = []
    local_cca_YtYp = [] 
    local_cca_XYt = [] 
    local_cca_XYp = [] 
    jerkiness_true = []
    jerkiness_pred = []
    losses = []

    for s, SID in enumerate(unique_srt_SIDs):

        #print SID
        if BM:
            d = np.load(save_results_path_prefix + 'MSBMvaltest_' + SID + '_' + tuning_type + '.npz')
        else:
            d = np.load(save_results_path_prefix + 'MSvaltest_' + SID + '_' + tuning_type + '.npz')

        ###############################################################################
        # Show testing results: for raw Y and smoothed Y
        #print "===========================================Raw=====================\n"
        #show_test_results(d['results_raw'])
        #print "===========================================Smooth=====================\n"
        #show_test_results(d['results_smooth'])
        #print "================================================================\n"

        ###############################################################################
        # Plot predictions (post-smoothed and raw) against ground truths and audio 
    #     t_VID = 0 # test VID to show (only 0 or 1 in subject dependent case)
    #     if t_VID >= len(d['Y_raw_list']):
    #         raise ValueError("Required test VID is out of bounds!")
    #     Y_true = d['Y_true_list'][t_VID]
    #     Y_raw = d['Y_raw_list'][t_VID]
    #     Y_smooth = d['Y_smooth_list'][t_VID]
    #     test_VID = d['test_VIDs'][t_VID]
    #     SD_offset = d['SD_offsets'][t_VID]

    #     plot_predictions(Y_true, Y_raw, Y_smooth, 'MLP_SD', angles_to_show='all', 
    #                          plot_start=SD_offset + 0, plot_length=300, input_mode='samples', SD_offset=SD_offset, 
    #                          test_VID=test_VID)

        # Calculate overall measures
        #print d['results_smooth'][1][0]
        local_cca_YtYp.append( d['results_smooth'][3]['YtYp'] )
        local_cca_XYt.append( d['results_smooth'][3]['XYt'] )
        local_cca_XYp.append( d['results_smooth'][3]['XYp'] )
        #print d['results_smooth'][3]['YtYp']
        rmse_overall_list.append( d['results_smooth'][1][0] ) # take only means, not stds

        jerkiness_true.append( np.sum(d['results_smooth'][4]['true']) )
        jerkiness_pred.append( np.sum(d['results_smooth'][4]['pred']) )
        
        losses.append( d['test_loss'] )
    
    print "======================OVERALL=====================\n"
    print "===========================================Smooth=====================\n"
    print "Mean RMSE_overall: ", np.mean(rmse_overall_list, axis=0), "(deg)"
    print 

    print "Local CCA XYt: ", np.mean(local_cca_XYt, axis=0)[0], " +/- ", np.mean(local_cca_XYt, axis=0)[1]
    print "Local CCA XYp: ", np.mean(local_cca_XYp, axis=0)[0], " +/- ", np.mean(local_cca_XYp, axis=0)[1]
    print "Local CCA delta", abs( np.mean(local_cca_XYt, axis=0)[0] - np.mean(local_cca_XYp, axis=0)[0] )
    print "Local CCA YtYp: ", np.mean(local_cca_YtYp, axis=0)[0], " +/- ", np.mean(local_cca_YtYp, axis=0)[1]
    print 
    print "Jerkiness (true): ", np.mean(jerkiness_true, axis=0)    # mean per subject
    print "Jerkiness (pred): ", np.mean(jerkiness_pred, axis=0)
    print 
    print "Loss:", np.mean(losses)
    print 


In [None]:
#######################################################################################################
# Overall testing statistics over all subjects (subject-dependent case)
# DONE
#######################################################################################################
import numpy as np
import time
import glob
from geoutils import radToDeg
from evalutils import show_test_results

tuning_type = '1_35_AF26'

TE_folder = 'TrainingExamples_16kHz'

save_results_path_prefix = './../Dataset/'+TE_folder+'/Results/MLP_SD/XXX'

unique_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_unique_srt_VIDs.npz')['unique_srt_VIDs']
all_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_VIDs.npz')['VIDs']
unique_srt_SIDs = np.array([x[:5] for i, x in enumerate(unique_srt_VIDs) if i % 2 == 0]) # ['PID02', 'PID05', ..

print tuning_type

rmse_overall_list = []
local_cca_YtYp = [] 
local_cca_XYt = [] 
local_cca_XYp = [] 
jerkiness_true = []
jerkiness_pred = []
losses = []

for s, SID in enumerate(unique_srt_SIDs):

    #print SID
    d = np.load(save_results_path_prefix + 'MSBMtest_' + SID + '_' + tuning_type + '.npz')

    ###############################################################################
    # Show testing results: for raw Y and smoothed Y
    #print "===========================================Raw=====================\n"
    #show_test_results(d['results_raw'])
    #print "===========================================Smooth=====================\n"
    #show_test_results(d['results_smooth'])
    #print "================================================================\n"

    ###############################################################################
    # Plot predictions (post-smoothed and raw) against ground truths and audio 
#     t_VID = 0 # test VID to show (only 0 or 1 in subject dependent case)
#     if t_VID >= len(d['Y_raw_list']):
#         raise ValueError("Required test VID is out of bounds!")
#     Y_true = d['Y_true_list'][t_VID]
#     Y_raw = d['Y_raw_list'][t_VID]
#     Y_smooth = d['Y_smooth_list'][t_VID]
#     test_VID = d['test_VIDs'][t_VID]
#     SD_offset = d['SD_offsets'][t_VID]

#     plot_predictions(Y_true, Y_raw, Y_smooth, 'MLP_SD', angles_to_show='all', 
#                          plot_start=SD_offset + 0, plot_length=300, input_mode='samples', SD_offset=SD_offset, 
#                          test_VID=test_VID)

    # Calculate overall measures
    #print d['results_smooth'][1][0]
    local_cca_YtYp.append( d['results_smooth'][3]['YtYp'] )
    local_cca_XYt.append( d['results_smooth'][3]['XYt'] )
    local_cca_XYp.append( d['results_smooth'][3]['XYp'] )
    #print d['results_smooth'][3]['YtYp']
    rmse_overall_list.append( d['results_smooth'][1][0] ) # take only means, not stds

    jerkiness_true.append( np.sum(d['results_smooth'][4]['true']) )
    jerkiness_pred.append( np.sum(d['results_smooth'][4]['pred']) )

    losses.append( d['test_loss'] )

print "======================OVERALL=====================\n"
print "===========================================Smooth=====================\n"
print "Mean RMSE_overall: ", np.mean(rmse_overall_list, axis=0), "(deg)"
print 

print "Local CCA XYt: ", np.mean(local_cca_XYt, axis=0)[0], " +/- ", np.mean(local_cca_XYt, axis=0)[1]
print "Local CCA XYp: ", np.mean(local_cca_XYp, axis=0)[0], " +/- ", np.mean(local_cca_XYp, axis=0)[1]
print "Local CCA delta", abs( np.mean(local_cca_XYt, axis=0)[0] - np.mean(local_cca_XYp, axis=0)[0] )
print "Local CCA YtYp: ", np.mean(local_cca_YtYp, axis=0)[0], " +/- ", np.mean(local_cca_YtYp, axis=0)[1]
print 
print "Jerkiness (true): ", np.mean(jerkiness_true, axis=0)    # mean per subject
print "Jerkiness (pred): ", np.mean(jerkiness_pred, axis=0)
print "Jerkiness (delta): ", abs( np.mean(jerkiness_pred, axis=0) - np.mean(jerkiness_true, axis=0) )
print 
print "Loss:", np.mean(losses)
print 

    

In [None]:
# Copy the best model for each subject
# For MLP SD
# DONE
import numpy as np
import time
import glob
from shutil import copy2

model_checkpoint_path_prefix = './ModelCheckpoints/MLP_SD/'

TE_folder = 'TrainingExamples_16kHz'
unique_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_unique_srt_VIDs.npz')['unique_srt_VIDs']
all_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_VIDs.npz')['VIDs']
unique_srt_SIDs = np.array([x[:5] for i, x in enumerate(unique_srt_VIDs) if i % 2 == 0]) # ['PID02', 'PID05', ..

tuning_types = [
    '1_35_AF13',
    '1_35_AF26', 
    '1_35_AF52', 
    '1_35_AF78'
]

for tuning_type in tuning_types:

    for s, SID in enumerate(unique_srt_SIDs):
        test_model_name = sorted(glob.glob(model_checkpoint_path_prefix + tuning_type + '/m_' + SID + '_*'))[-1]   
        new_name = model_checkpoint_path_prefix + tuning_type + '_' + test_model_name.split('/')[-1]
        copy2(test_model_name, new_name)
        print new_name
