In [None]:
#####################################################################################
# Audio-driven upper-body motion synthesis on a humanoid robot
# Computer Science Tripos Part III Project
# Jan Ondras (jo356@cam.ac.uk), Trinity College, University of Cambridge
# 2017/18
#####################################################################################
# 10-fold subject-independent cross-validation of the LSTM-SI model
#####################################################################################

In [None]:
###############################################################################################################
# Using the LSTM-SI model, train&test on 10 folds (every time 2 test subjects and 2 validation subjects) 
# Then save the results by subjects
###############################################################################################################

import numpy as np
import time
import glob
# import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Masking, TimeDistributed
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint

from geoutils import radToDeg, degToRad
from evalutils import norm_Y, inv_norm_Y
from postprocessingutils import save_predictions_and_eval2

N_folds = 10

SEGMENT_LEN = 300 # for evaluation (local cca)
AF_type = 'AF_logFB26_norm'
SEG_folder = 'Segments_logFB26'

np.random.seed(37) # for reproducibility
TE_folder = 'TrainingExamples_16kHz'
unique_srt_VIDs = unique_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_unique_srt_VIDs.npz')['unique_srt_VIDs'] # sorted VIDs
all_srt_VIDs = np.load('./../Dataset/'+TE_folder+'/te_VIDs.npz')['VIDs']
unique_srt_SIDs = np.array([x[:5] for i, x in enumerate(unique_srt_VIDs) if i % 2 == 0]) # ['PID02', 'PID05', ..

###
save_results_path_prefix = './../Dataset/'+TE_folder+'/Results/LSTM_SI/cvTest/'
model_checkpoint_path_prefix = './ModelCheckpoints/LSTM_SI/cvTest/'

PF = np.load('./../Dataset/'+TE_folder+'/te_PF_smooth_LPBF_4.0.npz')['PF_smooth_LPBF']
PF = PF[:, :11]
N_targets = PF.shape[1]
###########
# Target (Y) normalisation, into range 0-1 according to constraints
PF = norm_Y(PF)
print "Targets (Y) are TRANSFORMED to 0-1 range"

AF = np.load('./../Dataset/'+TE_folder+'/te_'+AF_type+'.npz')[AF_type]
N_features = AF.shape[1]

# Learning settings
N_epochs = 100
dropout = 0.
train_batch_size = 15000
N_LSTM_units = 12 # BEST FOUND in LSTM_SI

########################################################################
# For given subject SID (PID02) get corresponding VIDs
def get_subjects_VIDs(SIDs):
    result_VIDs = []
    for SID in SIDs:
        # Take VIDs from both tasks, for this subject
        result_VIDs.append(  SID + 'Task2' )
        result_VIDs.append(  SID + 'Task3' )
    return result_VIDs

# For given subjects get corresponding indicies into the feature set & also their counts (per VID)
def get_subjects_indicies(SIDs):
    indicies = []
    indicies_cnts = []
    for SID in SIDs:
        # Take indicies corresponding to both tasks, for this subject
        a = np.argwhere(all_srt_VIDs == SID + 'Task2')[:,0]
        b = np.argwhere(all_srt_VIDs == SID + 'Task3')[:,0]
        indicies.extend( a )
        indicies.extend( b )
        indicies_cnts.append( len(a) )
        indicies_cnts.append( len(b) )
    return indicies, indicies_cnts
# print frameCnts[26] + frameCnts[27] + frameCnts[0] + frameCnts[1] # checks
# print len(get_subjects_indicies(['PID20', 'PID02']))
########################################################################

#######################################################
# Dataset split

N_test_SIDs = 2
N_val_SIDs = 2
N_SIDs = 19
N_train_SIDs = N_SIDs - N_test_SIDs - N_val_SIDs
print "Dataset split in terms of subjects (train/val/test): ", 100.*N_train_SIDs/N_SIDs, "/", 100.*N_val_SIDs/N_SIDs, "/", 100.*N_test_SIDs/N_SIDs, "%"
print 

# Randomise the dataset split
permI = np.random.permutation(N_SIDs)

for fold_i in range(N_folds):
    
    print permI
    train_SIDs_mask = permI[:N_train_SIDs] 
    val_SIDs_mask =   permI[N_train_SIDs:N_train_SIDs+N_val_SIDs]
    test_SIDs_mask =  permI[N_train_SIDs+N_val_SIDs:] 
    
    print "Train SIDs",      unique_srt_SIDs[train_SIDs_mask]
    print "Valid SIDs",      unique_srt_SIDs[val_SIDs_mask]
    print "Testi SIDs",      unique_srt_SIDs[test_SIDs_mask]

    print "Train SIDs mask", train_SIDs_mask
    print "Valid SIDs mask", val_SIDs_mask
    print "Testi SIDs mask", test_SIDs_mask

    train_VIDs = get_subjects_VIDs(unique_srt_SIDs[train_SIDs_mask])
    val_VIDs   = get_subjects_VIDs(unique_srt_SIDs[val_SIDs_mask])
    test_VIDs  = get_subjects_VIDs(unique_srt_SIDs[test_SIDs_mask])

    print "Train VIDs", train_VIDs
    print "Valid VIDs", val_VIDs
    print "Testi VIDs", test_VIDs

    train_mask, train_VIDs_ind_cnts = get_subjects_indicies(unique_srt_SIDs[train_SIDs_mask])
    val_mask, val_VIDs_ind_cnts     = get_subjects_indicies(unique_srt_SIDs[val_SIDs_mask])
    test_mask, test_VIDs_ind_cnts   = get_subjects_indicies(unique_srt_SIDs[test_SIDs_mask])
    # print test_VIDs_ind_cnts

    print "Train/val/test set sizes: ", len(train_mask), "/", len(val_mask), "/", len(test_mask), " = ", len(train_mask) + len(val_mask) + len(test_mask)
    print "Dataset split in terms of #examples (train/val/test): ", 100.*len(train_mask)/len(all_srt_VIDs), "/", 100.*len(val_mask)/len(all_srt_VIDs), "/", 100.*len(test_mask)/len(all_srt_VIDs), "%"
    print 

    permI = np.roll(permI, 2) # ROTATE INDICES - FOR NEXT DATASET SPLIT
    
    Y_train = PF[train_mask]
    Y_val   = PF[val_mask]
    Y_test  = PF[test_mask]
    X_train = AF[train_mask]
    X_val   = AF[val_mask]
    X_test  = AF[test_mask]  
    
    train_VIDs_ind_cnts = np.array( train_VIDs_ind_cnts )
    val_VIDs_ind_cnts = np.array( val_VIDs_ind_cnts )
    test_VIDs_ind_cnts = np.array( test_VIDs_ind_cnts )
    
    ################################
    # Segment training sequences
    offset_f = 0 # frames
    offset_s = 0 # segments

    N_train_seg = train_VIDs_ind_cnts - SEGMENT_LEN + 1 # Array of number of segments per training VID; No padding

    X = np.zeros((np.sum(N_train_seg), SEGMENT_LEN, N_features))
    Y = np.zeros((np.sum(N_train_seg), SEGMENT_LEN, N_targets))

    for i, N_VID_frames in enumerate(train_VIDs_ind_cnts):

        for j in range(N_train_seg[i]):

            X[offset_s+j] = X_train[offset_f+j:offset_f+j+SEGMENT_LEN]
            Y[offset_s+j] = Y_train[offset_f+j:offset_f+j+SEGMENT_LEN]

        offset_f += N_VID_frames
        offset_s += N_train_seg[i]

    X_train = X
    Y_train = Y
    
    ################################
    # Segment validation sequences
    offset_f = 0 # frames
    offset_s = 0 # segments

    N_val_seg = val_VIDs_ind_cnts - SEGMENT_LEN + 1 # Array of number of segments per training VID; No padding

    X = np.zeros((np.sum(N_val_seg), SEGMENT_LEN, N_features))
    Y = np.zeros((np.sum(N_val_seg), SEGMENT_LEN, N_targets))

    for i, N_VID_frames in enumerate(val_VIDs_ind_cnts):

        for j in range(N_val_seg[i]):

            X[offset_s+j] = X_val[offset_f+j:offset_f+j+SEGMENT_LEN]
            Y[offset_s+j] = Y_val[offset_f+j:offset_f+j+SEGMENT_LEN]

        offset_f += N_VID_frames
        offset_s += N_val_seg[i]

    X_val = X
    Y_val = Y

    ################################
    # Segment testing sequences (for realtime testing: zero-pad segments at the beginning: #segments=#frames)
    offset = 0

    X = np.zeros((np.sum(test_VIDs_ind_cnts), SEGMENT_LEN, N_features)) # #segments=#frames
    Y = np.zeros((np.sum(test_VIDs_ind_cnts), SEGMENT_LEN, N_targets))

    for i, N_VID_frames in enumerate(test_VIDs_ind_cnts):

        for j in range(N_VID_frames):
            # Do zero-padding at the beginning
            if j < SEGMENT_LEN - 1:
                X[offset+j, SEGMENT_LEN - j - 1:] = X_test[offset:offset+j+1]
                Y[offset+j, SEGMENT_LEN - j - 1:] = Y_test[offset:offset+j+1]
            # Otherwise: as in the above section
            else:
                X[offset+j] = X_test[offset+j-SEGMENT_LEN+1:offset+j+1]
                Y[offset+j] = Y_test[offset+j-SEGMENT_LEN+1:offset+j+1]

        offset += N_VID_frames
        
    X_test_RT = X
    Y_test_RT = Y    
    
    #######################################################
    # Training
        
    st = time.time()

    ##########################
    # Final train & test
    
    # Create LSTM model
    model = Sequential()
    model.add( Masking(mask_value=0., input_shape=(SEGMENT_LEN, N_features)) )
    model.add( LSTM(N_LSTM_units, return_sequences=True) )  
    model.add( TimeDistributed(Dense(N_targets, activation='sigmoid')) )
    model.compile(loss='mean_squared_error', optimizer='adam')
    #print model.summary()

    # Set early stopping
    early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

    # Checkpoint model weights and the model itself: at each epoch
    model_checkpoint_name = 'm_{:02d}'.format(i) + '_{epoch:04d}_{loss:.4f}_{val_loss:.4f}.hdf5'
    model_checkpoint = ModelCheckpoint(model_checkpoint_path_prefix + model_checkpoint_name, monitor='val_loss', verbose=0, 
                                       save_best_only=True, save_weights_only=False, mode='auto', period=1)

    # Tain & validate
    hist = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=N_epochs, shuffle=True,  
                     batch_size=train_batch_size, verbose=0, 
                     callbacks=[early_stop, model_checkpoint])

    #######################################################
    # Testing & evaluation & saving results
    
    test_batch_size = X_test_RT.shape[0]
    Y_test_pred = model.predict(X_test_RT, batch_size=test_batch_size, verbose=1)
    Y_test_pred = Y_test_pred[:, -1, :] # last item from each segment is the (ONLINE) final prediction
    Y_test_true = Y_test_RT[:, -1, :]
    X_test_RT_last = X_test_RT[:, -1, :]
    #print X_test_RT.shape, X_test_RT_last.shape, Y_test_pred.shape, Y_test_true.shape

    # 1. test subject
    cnt = test_VIDs_ind_cnts[0] + test_VIDs_ind_cnts[1]
    
    # Avoid evaluation of PID23 twice
    if fold_i == N_folds - 1 and test_VIDs[0][:5] == 'PID23':
        pass
    else:
        save_predictions_and_eval2(save_results_path_prefix + 'test_' + test_VIDs[0][:5], 
                        X_test_RT_last[:cnt], Y_test_true[:cnt], Y_test_pred[:cnt], 'LSTM_SI', SEGMENT_LEN, test_VIDs[:2], 
                        test_VIDs_ind_cnts[:2], N_params=model.count_params(), N_epochs=len(hist.history['loss']))    

    # 2. test subject
    save_predictions_and_eval2(save_results_path_prefix + 'test_' + test_VIDs[2][:5], 
                    X_test_RT_last[cnt:], Y_test_true[cnt:], Y_test_pred[cnt:], 'LSTM_SI', SEGMENT_LEN, test_VIDs[2:], 
                    test_VIDs_ind_cnts[2:], N_params=model.count_params(), N_epochs=len(hist.history['loss']))    

    print "\tTime taken: ", time.time()-st, (time.time()-st)/60.

