In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from keras.models import load_model

from utils.custom_loss import mae_wrap_angle, mse_wrap_angle, angle_diff_deg
from utils.load_data_raw import DataGenerator_raw, load_raw_all_h5
from utils.dataset_split import *
from utils.eval import *
from utils.plot import *
from utils.utils import get_filelist, open_json

NUM_WORKER = 4
custom_obj={'mse_wrap_angle': mse_wrap_angle, 'mae_wrap_angle': mae_wrap_angle}

In [None]:
# load filelist
model_dir = '../data/models_trained/'
filelist = get_filelist(model_dir)

# load data
dset_dir = '/media/feliximmohr/Storage/master_thesis/generated/database/raw/raw_nf10_mid/database_raw_nf10_scaledMM.h5'
feat, targ, ID_ref, pos_t, cond_t, par = load_raw_all_h5(dset_dir)

# load unscaled data
dset_nsc_dir = '/media/feliximmohr/Storage/master_thesis/generated/database/raw/raw_nf10_mid/database_raw_nf10.h5'
feat_nsc, targ_nsc, _, _, _, _ = load_raw_all_h5(dset_nsc_dir)

# create parameter dicts for the test batch generators
params = create_test_params(feat, targ, par, shuffle=False)
params_nsc = create_test_params(feat_nsc, targ_nsc, par, shuffle=False)

cn = feat.columns.tolist()

part = {}
for i in range(len(cond_t)):
    cond = cond_t.iloc[i]
    cond_ids_test, pos_ids_test, subject_ids_test = get_subset_ids([cond[0]], cond_t)
    part[cond[0]] = get_subset_sample_idx(ID_ref, cond_ids_test, pos_ids_test, subject_ids_test)

In [None]:
# extract model and corresponding history and test partition filenames from filelist
m_flist = [x for x in filelist if ('history' not in x) and ('partition_test' not in x)]
h_flist = ['_' for x in np.zeros(len(m_flist))]
pt_flist = ['_' for x in np.zeros(len(m_flist))]
for file in filelist:
        if ('history' in file):
            name = file.replace('_history.json', '.h5')
            h_flist[m_flist.index(name)] = file
        if ('partition_test' in file):
            name = file.replace('_partition_test.json', '.h5')
            pt_flist[m_flist.index(name)] = file

# seperate model list based on test type (topology/wrapping/normalization)
m_flist_tt = [x for x in m_flist if 'toptest' in x]
m_flist_nt = [x for x in m_flist if 'normtest' in x]
m_flist_wt = [x for x in m_flist if 'wraptest' in x]

h_flist_tt = [h_flist[m_flist.index(i)] for i in m_flist_tt]
h_flist_nt = [h_flist[m_flist.index(i)] for i in m_flist_nt]
h_flist_wt = [h_flist[m_flist.index(i)] for i in m_flist_wt]

pt_flist_tt = [pt_flist[m_flist.index(i)] for i in m_flist_tt]
pt_flist_nt = [pt_flist[m_flist.index(i)] for i in m_flist_nt]
pt_flist_wt = [pt_flist[m_flist.index(i)] for i in m_flist_wt]

In [None]:
def print_filelist(flist):
    """Print tuple of filelists incl index."""
    if not isinstance(flist,tuple):
        flist = (flist,)
        
    for i in range(len(flist[0])):
        s = ' -' if i<10 else '-'
        for j in range(len(flist)):
            print(i, s, flist[j][i])

# print model list
#print_filelist((m_flist,h_flist,pt_flist))

In [None]:
def load_m_h_pt(model_dir, m_flist, h_flist, pt_flist):
    """Load models from model list and corresponding history and test partition."""
    m = []
    h = ['_' for x in np.zeros(len(m_flist))]
    p = ['_' for x in np.zeros(len(m_flist))]
    for i,j in enumerate(m_flist):
        m.append(load_model(model_dir + j, custom_objects=custom_obj))
        if h_flist[i] != '_':
            h[i] = open_json(model_dir, h_flist[i])
        if pt_flist[i] != '_':
            p[i] = open_json(model_dir, pt_flist[i])
    return m, h, p

# load models
#m_nt, h_nt, pt_nt = load_m_h_pt(model_dir, m_flist_nt, h_flist_nt, pt_flist_nt)
#m_wt, h_wt, pt_wt = load_m_h_pt(model_dir, m_flist_wt, h_flist_wt, pt_flist_wt)
m_tt, _, _ = load_m_h_pt(model_dir, m_flist_tt, h_flist_tt, pt_flist_tt)

## Prediction Evaluation

In [None]:
print_filelist(m_flist_tt)

In [None]:
#part_x_M027, pos_ids = get_pos_IDs(part['NFCHOA_M027'],ID_ref)
#part_x_R006, _ = get_pos_IDs(part['NFCHOA_R006'],ID_ref)

params_t = params.copy()
params_t['batch_size'] = 1000

params_t_nsc = params_nsc.copy()
params_t_nsc['batch_size'] = 1000

params_t_nic = create_test_params(feat[cn[:64]], targ, par, batch_size=1000, shuffle=False)

In [None]:
def model_pred_pos(model, part, params, ID_ref):
    """Returns list of ndarrays of predictions on model based on test partition per position."""
    part_x, pos_ids = get_pos_IDs(part,ID_ref)
    b_gen = []
    pred = []
    y = []
    for j in range(len(part_x)):
        b_gen.append(DataGenerator_raw(part_x[j], **params))
        pred.append(model.predict_generator(b_gen[j], verbose=0, use_multiprocessing=True, workers=4))
        y.append(get_y_gen(b_gen[j]))
    return pred, y

In [None]:
pred_R006 = {}
pred_M027 = {}
y_R006 = {}
y_M027 = {}
for i,md in enumerate(m_flist_tt):
    if 'no-ic' in md:
        params_tmp = params_t_nic
    elif 'nsc' in md:
        params_tmp = params_t_nsc
    else:
        params_tmp = params_t    
    
    pred_R006[md], y_R006[md] = model_pred_pos(m_tt[i] , part['NFCHOA_R006'], params_tmp, ID_ref)
    pred_M027[md], y_M027[md] = model_pred_pos(m_tt[i] , part['NFCHOA_M027'], params_tmp, ID_ref)
    print(i)

In [None]:
k = list(pred_M027.keys())
k[6]

In [None]:
# generate plots

# all positions overview
for name in k[19:]:
    model_name, loss_name, tdata, special, bs, _ = get_model_info(name)
    #plot_locaz_all(pred_R006[name],
    #               l=True,
    #               title='Model: {}{} | Loss: {} | Test-Data: {}'.format(model_name, special, loss_name, 'NFCHOA_R006'))
    plot_locaz_all(pred_M027[name],
                   l=True,
                   title='Model: {}{} | Loss: {} | Test-Data: {}'.format(model_name, special, loss_name, 'NFCHOA_M027'))

In [None]:
# individual positions incl. gt comparison
%matplotlib

m_idx = 8
pos_idx = 0

title = 'Model: {}{} | Loss: {} | Test-Data: {}\n Position: {}'.format(model_name, special, loss_name, 'NFCHOA_M027', pos_idx)
f, ax_p, ax_y = plot_locaz(pred_M027[k[m_idx]][pos_idx], y_M027[k[m_idx]][pos_idx], l=True)
f.suptitle(title, fontsize='x-large')
lines = (ax_p.get_children()[:1][0],ax_y.get_children()[:1][0],ax_p.get_children()[:][1],ax_p.get_children()[:][2],ax_p.get_children()[:][4])
f.legend(lines, ('predictions', 'human / gt', 'mean over subjects/repititions at 0° head rotation', '± 180°','± 180°'))

In [None]:
tpart_x, tpos_ids = get_pos_IDs(part['NFCHOA_M027'],ID_ref)
t_gen = DataGenerator_raw(tpart_x[3], **params_t)
tX,ty = t_gen.__getitem__(33)

test = y_M027[k[9]][9][36000-3000:36000-2000]
tpred = pred_M027[k[9]][9][36000-3000:36000-2000]

ty_m = np.zeros(5)
for i in range(5):
    ty_m[i] = np.mean(ty[i*200:i*200+200])
    
i = 0
ty[i*200:i*200+200]

In [None]:
tttt = pred_M027[k[6]][3][37800:38000]
ttty = y_M027[k[6]][3][37800:38000]

tX,ty = t_gen.__getitem__(37)

In [None]:
plt.imshow(tX[:,:], aspect='auto')

In [None]:
from sklearn.inspection import plot_partial_dependence
plot_partial_dependence(m_tt[9], ttX, np.arange(64), feature_names=cn,
                        n_jobs=1, grid_resolution=50)

In [None]:
tpp, tyy = model_pred_pos(m_tt[5] , part['NFCHOA_R006'], params_t_nsc, ID_ref)

In [None]:
plot_locaz(tpp[5], tyy[5], l=True)

In [None]:
test_subset = ['NFCHOA_R300']
partition = create_split(ID_ref,cond_t,test_subset=test_subset,valid_split=0.000000001)

In [None]:
params = {'dim': feat.shape[1],
          'batch_size': 1000,
          'feature_data': feat.values,
          'target_data' : targ.values,
          'shuffle': True,
          'n_frames': par['nFrames'].values,
          'n_angles': par['nAngles'].values
         }
train_batch_generator = DataGenerator_raw(partition['test'], **params)

In [None]:
tn = partition['test'].shape[0]
ttX = np.zeros((tn,96))
tty = np.zeros(tn)
for i in range(int(tn/1000)):
    ttX[i*1000:i*1000+1000,:],tty[i*1000:i*1000+1000] = train_batch_generator.__getitem__(i)
    if i==round(float(i) / 100) * 100:
        print(i)

In [None]:
cn = feat.columns.tolist()
data = {}
for i,n in enumerate(cn):
    data[n] = ttX[:,i]
    data['y'] = tty
df = pd.DataFrame(data)
df_t = pd.DataFrame({'targets':tty})

In [None]:
import seaborn as sns
%matplotlib
corrmat = df.corr()
top_corr_features = corrmat.index
plt.figure(figsize=(20,20))
#plot heat map
g=sns.heatmap(df[top_corr_features].corr(),annot=False,cmap="RdYlGn")

In [None]:
from pdpbox import pdp

%matplotlib

t_data = df
t_target = df_t['targets']
t_model = m_tt[9]

pdp_feat = []
for i in range(len(cn)):
    pdp_feat.append(pdp.pdp_isolate(model=t_model,dataset=t_data,model_features=cn,feature=cn[i]))
    print('Feature No.:')
    print(i)

In [None]:
from pycebox.ice import ice, ice_plot

ice_df = ice(df, cn[0], m_tt[7].predict, num_grid_points=10)

In [None]:
i = 0
fig, axes = pdp.pdp_plot(pdp_isolate_out=pdp_feat[i], feature_name=cn[i])