In [None]:
#######################################################################################################################
# Project: Deep Virtual Rapport Agent (head gesture detector)
#
#     Jan Ondras (jo951030@gmail.com)
#     Institute for Creative Technologies, University of Southern California
#     April-October 2019
#
#######################################################################################################################
#
#     Test the final nod, shake, and tilt detector models trained on the whole 4comb dataset on other datasets
#
#     Namely, cross-dataset testing on the ccdb dataset. 
#     Requires the segmented ccdb datasets to be generated before (datasets_scripts/ccdb/generate_dataset.ipynb).
#
#     This tests the nod, shake, and tilt models independently. 
#     For testing of a fused 4-class (none/nod/shake/tilt) HGD model see the script ./evaluate_fused_final_4comb_hgd.ipynb
#
#######################################################################################################################

In [1]:
###########################################################
import numpy as np
random_seed = 37
np.random.seed(random_seed)
from tensorflow import set_random_seed
set_random_seed(random_seed)
###########################################################

import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

from keras.models import Sequential, load_model
# from keras_tqdm import TQDMNotebookCallback
from utils import load_train_history, save_train_history, plot_loss_history, arch_to_str, evaluate_custom_metrics
from collections import defaultdict
import pickle
from keras import backend as K
import glob
import time
from matplotlib import pyplot as plt

Using TensorFlow backend.


In [2]:
#######################################################################################################################
# For a given head gesture: evaluate the best model on the test dataset (ccdb) and save the results as one .pkl file
#
# Saved to the folder ./test_results_final_4comb_{S,nonS}
# Filenames naming convention: test_results_{dataset_name}_{S,nonS}_{head_gesture}_{window_size}ws_{number_of_features}f_{model_architecture}.pkl 
#######################################################################################################################

#############################################
# Smooth the predicted labels?
smooth_labels = False # Name infix *_nonS_*
smooth_labels = True  # Name infix *_S_*
#############################################

HEAD_GESTURE = 'nod'
GRU_ARCH = [16]

# HEAD_GESTURE = 'shake'
# GRU_ARCH = [8]

# HEAD_GESTURE = 'tilt'
# GRU_ARCH = [16]


WINDOW_SIZE = 32
N_FEATURES = 12

dataset_type = f'{WINDOW_SIZE}ws_{N_FEATURES}f'
# Augmentation
model_type = f'{dataset_type}_A1_{arch_to_str(GRU_ARCH)}u'
# model_type = f'{dataset_type}_{arch_to_str(GRU_ARCH)}u'

metrics_names = ['bacc', 'f1', 'precision', 'recall']
voting_strategies = ['last', 'majority']
train_dataset_name = 'final_4comb'
test_dataset_names = ['ccdb']

datasets_path_prefix = f'/home/ICT2000/jondras/dvra_datasets'
checkpoints_path_prefix = f'/home/ICT2000/jondras/deep-virtual-rapport-agent/head_gesture_detector/checkpoints/final_4comb'

test_results_type = f'test_results_final_4comb_S' if smooth_labels else f'test_results_final_4comb_nonS'
if not os.path.exists(f'./test_results/{test_results_type}'):
    os.makedirs(f'./test_results/{test_results_type}')
    
# Test results are saved as dictionary:
# test_results[train_dataset_name][test_dataset_name][voting_strategy][metric_name]
test_results = dict()
test_results[train_dataset_name] = dict()

# Just one final model
model_path = f'{checkpoints_path_prefix}/{train_dataset_name}_{HEAD_GESTURE}_{model_type}.hdf5'

start_time = time.time()
# Load best model for this dataset
# local_start_time = time.time() 
K.clear_session()
best_model = load_model(model_path)
print(f'Loading model:\n\t{model_path.split("/")[-1]} \t #params: {best_model.count_params()}\n')  
# print(f'\t\t Time to load model: {time.time() - local_start_time} s')

# Iterate over datasets to test the model
for test_dataset_name in test_dataset_names:

    test_results[train_dataset_name][test_dataset_name] = dict()
    for vs in voting_strategies:
        test_results[train_dataset_name][test_dataset_name][vs] = defaultdict(list)  

    # Load testing data
    # local_start_time = time.time() 
    print(f'\tTesting on {test_dataset_name}')
    data = np.load(f'{datasets_path_prefix}/{test_dataset_name}/segmented_datasets/{test_dataset_name}_{HEAD_GESTURE}_{dataset_type}.npz')
    X_test,  Y_test  = data['X_test'],  data['Y_test']
    # print(f'\t\t Time to load data: {time.time() - local_start_time} s')

    # local_start_time = time.time() 
    test_metrics = evaluate_custom_metrics(Y_true=Y_test, 
                                           Y_pred=best_model.predict_classes(X_test, 
                                                                             batch_size=10000
#                                                                                  batch_size=len(X_test)
                                                                            ), 
                                           chunk_lens=data['test_len'], window_size=WINDOW_SIZE, 
                                           smooth=smooth_labels
                                          )
    # print(f'\t\t Time to calculate test metrics: {time.time() - local_start_time} s')
    for vs in voting_strategies:
        for mn in metrics_names:
            test_results[train_dataset_name][test_dataset_name][vs][mn].append( test_metrics[vs][mn] )
            print(f'\t\t[{vs}] {mn}: \t\t {test_metrics[vs][mn]:.4f}')
        print()
    print()        

print(f'\t\t Total time taken: {time.time() - start_time} s\n')

# Save test results (from all testing of the current model on all datasets)
save_results_path = f'./test_results/{test_results_type}/{test_results_type}_{HEAD_GESTURE}_{model_type}.pkl'
with open(save_results_path, 'wb') as pickle_filehandler:
    pickle.dump(test_results, pickle_filehandler)
print(f'Saved to {save_results_path}\n')


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Loading model:
	final_4comb_nod_32ws_12f_A1_16u.hdf5 	 #params: 1409

	Testing on ccdb
		[last] bacc: 		 0.7948
		[last] f1: 		 0.2187
		[last] precision: 		 0.1248
		[last] recall: 		 0.8842

		[majority] bacc: 		 0.7984
		[majority] f1: 		 0.2210
		[majority] precision: 		 0.1262
		[majority] recall: 		 0.8891


		 Total time taken: 12.261885643005371 s

Saved to ./test_results_final_4comb_S/test_results_final_4comb_S_nod_32ws_12f_A1_16u.pkl



In [5]:
################################
# Print testing summary: 4comb
################################
import pickle
from tabulate import tabulate
from utils import arch_to_str
import numpy as np

#############################################
# Smooth the predicted labels?
smooth_labels = False # Name infix *_nonS_*
smooth_labels = True  # Name infix *_S_*
#############################################

metrics_names = ['bacc', 'f1', 'precision', 'recall']
voting_strategies = ['last', 'majority']

train_dataset_name = '4comb'
test_dataset_names = ['4comb']

test_results_type = f'test_results_final_4comb_S' if smooth_labels else f'test_results_final_4comb_nonS'
    
for save_results_path in sorted(glob.glob(f'./test_results/{test_results_type}/{test_results_type}_*.pkl')):
    print(save_results_path.split('/')[-1])
    with open(save_results_path, 'rb') as pickle_filehandler:
        test_results = pickle.load(pickle_filehandler)
        
        for test_dataset_name in test_dataset_names:
            for vs in voting_strategies:
                for mn in metrics_names:
                    print(f'\t\t[{vs}] {mn}: \t\t {test_results[train_dataset_name][test_dataset_name][vs][mn][0]:.4f}')
                print()
            print()    
            

test_results_4comb_S_nod_32ws_12f_16u.pkl
		[last] bacc: 		 0.8422
		[last] f1: 		 0.5045
		[last] precision: 		 0.3628
		[last] recall: 		 0.8278

		[majority] bacc: 		 0.8494
		[majority] f1: 		 0.5195
		[majority] precision: 		 0.3771
		[majority] recall: 		 0.8347


test_results_4comb_S_shake_32ws_12f_8u.pkl
		[last] bacc: 		 0.8558
		[last] f1: 		 0.5003
		[last] precision: 		 0.3586
		[last] recall: 		 0.8275

		[majority] bacc: 		 0.8371
		[majority] f1: 		 0.4944
		[majority] precision: 		 0.3613
		[majority] recall: 		 0.7826


test_results_4comb_S_tilt_32ws_12f_16u.pkl
		[last] bacc: 		 0.7409
		[last] f1: 		 0.2470
		[last] precision: 		 0.1538
		[last] recall: 		 0.6267

		[majority] bacc: 		 0.7537
		[majority] f1: 		 0.2580
		[majority] precision: 		 0.1609
		[majority] recall: 		 0.6499


