In [1]:
import pandas as pd
import numpy as np
import random

from statistics import mean, stdev
from scipy.stats import mode

from IPython.display import IFrame, display

import matplotlib.pyplot as plt
import seaborn as sns

# from sklearn import metrics
from sklearn.model_selection import train_test_split, RandomizedSearchCV, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay, plot_roc_curve

import warnings
warnings.filterwarnings("ignore")

pd.options.display.max_columns = None

def reset_plt():
    plt.figure().clear()
    plt.close()
    plt.cla()
    plt.clf()
    plt.figure()
    
    sns.set_context("paper", rc={"font.size": 16,
                                 "axes.titlesize": 24,
                                 "axes.labelsize": 16}) 


data_path = '../../data/sim1/'
figure_path = '../../figure/sim1/'

In [2]:
# final_df = pd.read_csv(data_path + "data_3.csv")
# final_df.head()

In [4]:
final_df.columns

In [None]:
final_df.describe()

In [None]:
final_df.Subject.unique()

In [None]:
len(final_df.Subject.unique())

In [None]:
def get_round(num, point=2):
    return round(num, point)

def get_rounded_str(num, point):
    return str(get_round(num, point))


def get_drive_name(drive):
    if drive == 2:
        return 'CD'
    elif drive == 3:
        return 'MD'
    elif drive == 4:
        return 'ED'
    elif drive == 5:
        return 'FD'


def get_test_subjs(arousal_signal):
    
    ######################################
    # 'PP', 'PP_2', 'HR', 'BR', 'PP_HR_BR'
    ######################################
    
    if arousal_signal == 'PP':
        #####################################
        # return [2, 31, 66, 47, 44, 25, 24]
        #####################################
        return [18, 23, 16, 25, 8, 45, 2]
    
    elif arousal_signal == 'PP_2':
        return [44, 20, 16, 68, 33, 60, 18]
    
    elif arousal_signal == 'HR':
        return [61, 29, 24, 38, 84, 2, 17]
    
    elif arousal_signal == 'BR':
        return [44, 62, 81, 20, 61, 38, 79]
    
    elif arousal_signal == 'PP_HR_BR':
        return [31, 66, 16, 29, 62, 44, 36]
    




def get_splitted_data(final_df, model_features, arousal_signal, predict_col, model_train_method, subjects_test=None):
    
    unique_subjs = final_df.Subject.unique()
    
    df_arousal_count = final_df.groupby(['Arousal_Mode']).agg({'Arousal_Mode': 'count'})
    df_arousal_count = df_arousal_count.apply(lambda x: 100 * x / float(x.sum()))
    # print(df_arousal_count.head(2))

    ########################################################################
    '''
    train_subj_end = 65

    train_df = final_df[final_df.Subject < train_subj_end]
    test_df = final_df[final_df.Subject >= train_subj_end]
    
    train_subjs = train_df.Subject.unique()
    test_subjs = test_df.Subject.unique()
    
    print('\n\nTest Subjects -->\n', test_subjs)
    '''
    ########################################################################
    
    
    
    ########################################################################
    # '''
    if model_train_method=='best_accurate':
        test_subjs = get_test_subjs(arousal_signal)
    elif model_train_method=='bootstrap':
        test_subjs = random.sample(list(final_df.Subject.unique()), 7)
    elif model_train_method=='kfold':
        test_subjs = subjects_test
        
    train_subjs = [subj for subj in unique_subjs if subj not in test_subjs]
    
    train_df = final_df[final_df.Subject.isin(train_subjs)]
    test_df = final_df[final_df.Subject.isin(test_subjs)]
    
    print('\n\nTest Subjects -->\n', test_subjs)
    # '''
    ########################################################################
    
    
    
    print('\n\nTotal Train Subjects: ' + str(len(train_subjs)) + '    Percentage: ' + str(round(100*len(train_subjs)/len(unique_subjs), 2)) + '%')
    print('Total Test Subjects: ' + str(len(test_subjs)) + '    Percentage: ' + str(round(100*len(test_subjs)/len(unique_subjs), 2)) + '%' + '\n\n')


    train_df_arousal_count = train_df.groupby(['Arousal_Mode']).agg({'Arousal_Mode': 'count'})
    train_df_arousal_count = train_df_arousal_count.apply(lambda x: 100 * x / float(x.sum()))
    print('Train Data Percentage -->')
    print(train_df_arousal_count)


    test_df_arousal_count = test_df.groupby(['Arousal_Mode']).agg({'Arousal_Mode': 'count'})
    test_df_arousal_count = test_df_arousal_count.apply(lambda x: 100 * x / float(x.sum()))
    print('\nTest Data Percentage -->')
    print(test_df_arousal_count)


    print('\n\nTotal Train Rows: ' + str(len(train_df)) + '    Percentage: ' + str(round(100*len(train_df)/len(final_df), 2)) + '%')
    print('Total Test Rows: ' + str(len(test_df)) + '    Percentage: ' + str(round(100*len(test_df)/len(final_df), 2)) + '%' + '\n\n')

    
    X_train = train_df[model_features]
    y_train = train_df[[predict_col]]

    X_test = test_df[model_features]
    y_test = test_df[[predict_col]]
    
    return X_train, y_train, X_test, y_test, train_df, test_df


In [None]:
model_df = final_df.copy().rename(columns={
                             'Gender_Female': 'Gender', 
                             'Effort': 'NASA_Effort',
                             'Frustration': 'NASA_Frustration', 
                             'Mental_Demand': 'NASA_Mental',
                             'Performance': 'NASA_Performance', 
                             'Physical_Demand': 'NASA_Physical',
                             'Temporal_Demand': 'NASA_Temporal', 
                             'NASA_Total_Sum': 'NASA_Total',
                             'Hr_Mean': 'HR_Mean',
                             'Hr_SD': 'HR_SD',
                             'Hr_Median': 'HR_Median',
                             'Hr_SS': 'HR_SS',
                             'Br_Mean': 'BR_Mean',
                             'Br_SD': 'BR_SD',
                             'Br_Median': 'BR_Median',
                             'Br_SS': 'BR_SS'
                            })


################################################################################################
# model_all_features = [
#     'Age', 'Gender',     
#     'NASA_Mental', 'NASA_Physical', 'NASA_Effort', 'NASA_Frustration', 'NASA_Temporal', 'NASA_Performance', 'NASA_Total',          
#     'Perinasal_Mean', 'Perinasal_Median', 'Perinasal_SD', 'Perinasal_SS',     
#     'HR_Mean', 'HR_Median', 'HR_SD', 'HR_SS',
#     'BR_Mean', 'BR_Median', 'BR_SD', 'BR_SS'
# ]
################################################################################################

model_features = [
    'Age', 'Gender',     
    'NASA_Physical', 'NASA_Effort', 'NASA_Frustration', 'NASA_Temporal', 'NASA_Performance',          
    'Perinasal_Mean', 'Perinasal_SD',     
    'HR_Mean', 'HR_SD',
    'BR_Mean', 'BR_SD'
]

# print(len(model_features), len(plot_features)) ## 22, 21
# print(plot_features)
################################################################################################




################################################################################################
# model_features = [
#     'Age', 'Gender_Female', 'Gender_Male', 
#     'Effort', 'Frustration', 'Mental_Demand', 'Performance', 'Physical_Demand', 'Temporal_Demand', 'NASA_Total_Sum', 
#     'Perinasal_Mean', 'Perinasal_SD', 'Perinasal_Median', 'Perinasal_SS', 
#     'Hr_Mean', 'Hr_SD', 'Hr_Median', 'Hr_SS', 
#     'Br_Mean', 'Br_SD', 'Br_Median', 'Br_SS',


# ################################################################################################
# #              Not Available in any of SIM2, TT1, EmailStress Studies
# ################################################################################################
# #     'STAI', 'Type_AB', 
# #     'NASA_Total_Sum_Normalized', 
# #     'Effort_Normalized', 'Frustration_Normalized', 'Mental_Demand_Normalized',
# #     'Performance_Normalized', 'Physical_Demand_Normalized', 'Temporal_Demand_Normalized', 
# #     'Palm_Mean', 'Palm_SD', 'Palm_Median', 'Palm_SS', 
# #     'Drive_Label_CD', 'Drive_Label_ED', 'Drive_Label_MD', 
# #     'Nasa_Cluster_High', 'Nasa_Cluster_Low',
# ################################################################################################

# ]
################################################################################################






################################################################################################
model = RandomForestClassifier(n_estimators = 200,
                               max_features = 'auto',
                               bootstrap = True)
################################################################################################

In [None]:
def get_confusion_matrix(arousal_signal, model_df, test_df, y_test, y_pred, do_normalize):
    
    #####################################################################################
    reset_plt()
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)

    #-------------------------------------------------------------#
    # labels = ["normal", "relaxed", "stressed", "unknown"]
    # labels = ["relaxed", "normal", "unknown", "stressed"]
    # labels = ["relaxed", "stressed"]
    labels = sorted(model_df.Arousal_Mode.unique())
    #-------------------------------------------------------------#
    
    if do_normalize:
        conf_mat = pd.DataFrame(confusion_matrix(y_test, y_pred, labels=labels, normalize='all'))
        sns.heatmap(conf_mat*100, annot=True, annot_kws={"size": 24})
        # plt.title('Confusion Matrix - ' + arousal_signal + ' %', fontsize=40)
        plot_name = arousal_signal.lower() + '_percentage'
        
    else:
        conf_mat = pd.DataFrame(confusion_matrix(y_test, y_pred, labels=labels))
        # annot = [str(val)+"%" for val in conf_mat]
        sns.heatmap(conf_mat, annot=True, fmt='d', annot_kws={"size": 24})
        # plt.title('Confusion Matrix - ' + arousal_signal, fontsize=40)
        plot_name = arousal_signal.lower() 

    ax.collections[0].colorbar.ax.tick_params(labelsize=28)
    
    ax.set_title('%')
    
    ax.set_xticklabels(labels, fontsize=24)
    ax.set_yticklabels(labels, fontsize=24)
    
    ax.set_xlabel('Predicted', fontsize=32)
    ax.set_ylabel('Actual', fontsize=32)

    plt.savefig(figure_path + plot_name + '_confusion_matrix.png')
    plt.savefig(figure_path + plot_name + '_confusion_matrix.pdf')
    
    plt.show()
    #####################################################################################
    
    

def get_all_confusion_matrices(arousal_signal, model_df, test_df, y_test, y_pred):
    
    get_confusion_matrix(arousal_signal, model_df, test_df, y_test, y_pred, do_normalize=False)
    get_confusion_matrix(arousal_signal, model_df, test_df, y_test, y_pred, do_normalize=True)

    #####################################################################################
    # print('\n\nConfusion Matrix:')
    # print(pd.DataFrame(confusion_matrix(y_test, y_pred, labels=labels)))
    #####################################################################################
    
    
    #####################################################################################
    labels = sorted(model_df.Arousal_Mode.unique())
    
    reset_plt()
    fig, axs = plt.subplots(3, figsize = (12, 36))
    fig.suptitle(arousal_signal)

    for i, drive in enumerate(test_df.Drive.unique()):
        drive_test_df = test_df.copy()[test_df.Drive == drive]
        y_test_drive = drive_test_df.Arousal_Mode
        y_pred_drive = drive_test_df.Prediction

        conf_mat = pd.DataFrame(confusion_matrix(y_test_drive, y_pred_drive, labels = labels))
        axis = axs[i]

        sns.heatmap(conf_mat, annot=True, fmt='d', annot_kws={"size": 24}, ax = axis)
        axis.collections[0].colorbar.ax.tick_params(labelsize=32)
        
        axis.title.set_text(get_drive_name(drive))
        axis.set_xticklabels(labels, fontsize=24)
        axis.set_yticklabels(labels, fontsize=24)
        axis.set_xlabel('Predicted', fontsize=24)
        axis.set_ylabel('Actual', fontsize=24)

    plt.savefig(figure_path + arousal_signal.lower() + '_drive_confusion_matrix.png')
    plt.savefig(figure_path + arousal_signal.lower() + '_drive_confusion_matrix.pdf')
    
    # plt.show()
    #####################################################################################

    
    
    
def get_feature_importance(model, X_train, arousal_signal):
    
    df_feature_importance = pd.DataFrame(model.feature_importances_, 
                                     index=X_train.columns, 
                                     columns=['feature importance']).sort_values('feature importance', ascending=False)
    feature_imp = pd.Series(model.feature_importances_,
                            index=X_train.columns).sort_values(ascending=False).round(2)

    reset_plt()
    plt.figure(figsize=(25, 15))
    
    sns_plt = sns.barplot(x=feature_imp, y=feature_imp.index) 
    
    # plt.title(arousal_signal + ' - Important Features', fontsize=36)
    # sns_plt.axes.set_title(arousal_signal + ' - Important Features', fontsize=36)
    
    sns_plt.set_xlabel('Feature Importance Score', fontsize=36)
    sns_plt.set_ylabel('Features', fontsize=36)

    sns_plt.tick_params(labelsize=24)
#     sns_plt.set_xticklabels(sns_plt.get_xticklabels(), fontsize = 24)
    sns_plt.set_yticklabels(sns_plt.get_yticklabels(), fontsize = 24, rotation = 30)

    plt.savefig(figure_path + arousal_signal.lower() + '_feature_importance.png')
    plt.savefig(figure_path + arousal_signal.lower() + '_feature_importance.pdf')
    
    plt.show()
    
    
def get_correlation_plot(model_df, arousal_signal, selected_model_features):
    cor_df = model_df.copy()[['Arousal_Mode'] + selected_model_features]
    corr = cor_df.corr().round(3)

    reset_plt()
    plt.figure(figsize=(40, 40))
    plt.subplot(1, 1, 1)
    
    sns_plt = sns.heatmap(corr, cmap="YlGnBu", center=0, square=True, linewidths=.5, annot=True, annot_kws={"size": 24})
    sns_plt.collections[0].colorbar.ax.tick_params(labelsize=32)

    sns_plt.set_xticklabels(sns_plt.get_yticklabels(), rotation = 45, fontsize = 38)
    sns_plt.set_yticklabels(sns_plt.get_yticklabels(), rotation = 0, fontsize = 38) 
    
    plt.savefig(figure_path + arousal_signal.lower() + '_correlation_plot.png')
    plt.savefig(figure_path + arousal_signal.lower() + '_correlation_plot.pdf')
    
    plt.show()
    
    
    
def get_bootstrap_results(model_df, model_features, arousal_signal, model_train_method, subjects_test=None):

    #####################################################################################
    arousal_col = arousal_signal + '_Arousal_Mode'

    # 'PP_Arousal_Mode', 'HR_Arousal_Mode', 'BR_Arousal_Mode'
    # 'PP_HR_Arousal_Mode', 'HR_BR_Arousal_Mode', 'PP_HR_BR_Arousal_Mode'
    #####################################################################################




    #####################################################################################
    model_df['Arousal_Mode'] = model_df[arousal_col]
    # print(model_df.Arousal_Mode.unique())
    #####################################################################################




    #####################################################################################
    #                               MODELING
    #####################################################################################
    X_train, y_train, X_test, y_test, train_df, test_df = get_splitted_data(model_df, 
                                                                            model_features,
                                                                            arousal_signal,
                                                                            'Arousal_Mode',
                                                                            model_train_method,
                                                                            subjects_test) 

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    test_df['Prediction'] = y_pred

    Accuracy = get_round(accuracy_score(y_test, y_pred))
    F1 = get_round(f1_score(y_test, y_pred, average='weighted'))
    Recall = get_round(recall_score(y_test, y_pred, average='weighted'))
    Precision = get_round(precision_score(y_test, y_pred, average='weighted'))
    
    # Ref: https://www.analyticsvidhya.com/blog/2020/06/auc-roc-curve-machine-learning/
    AUC = get_round(roc_auc_score(y_test, model.predict_proba(X_test)[:,1]))

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    Specificity = get_round(tn / (tn+fp))
    
    print('Accuracy:' + str(Accuracy))
    print('AUC: ' + str(AUC))
    print('F1: ' + str(F1))
    print('Recall: ' + str(Recall))
    print('Precision: ' + str(Precision))
    print('Specificity: ' + str(Specificity))
    print('\n\n')  
    
    signal_metrics[arousal_signal]['Accuracy'].append(Accuracy)
    signal_metrics[arousal_signal]['AUC'].append(AUC)
    signal_metrics[arousal_signal]['F1'].append(F1)
    signal_metrics[arousal_signal]['Recall'].append(Recall)
    signal_metrics[arousal_signal]['Precision'].append(Precision)
    signal_metrics[arousal_signal]['Specificity'].append(Specificity)
    #####################################################################################
    
    
    
    
    #####################################################################################
    plot_roc_curve(model, X_test, y_test)
    
    plt.savefig(figure_path + arousal_signal.lower() + '_roc_curve.png')
    plt.savefig(figure_path + arousal_signal.lower() + '_roc_curve.pdf')
    
    plt.show()
    #####################################################################################
    
    
    
    
    #####################################################################################
    #                         Feature Importance
    #####################################################################################
    get_feature_importance(model, X_train, arousal_signal)
    #####################################################################################
    
    
    #####################################################################################
    #                              PLOTTING
    #####################################################################################
# ??     get_all_confusion_matrices(arousal_signal, model_df, test_df, y_test, y_pred)
# ??    get_all_confusion_matrices(arousal_signal, train_df, test_df, y_test, y_pred)
    #####################################################################################
    
    
    
    #####################################################################################
    #                        Classification Probabilities
    #####################################################################################
    y_pred_probabilities = model.predict_proba(X_test)
    # print(y_pred_probabilities)
    
    test_df["Relaxed_Prob"], test_df["Stress_Prob"] = y_pred_probabilities[:,0], y_pred_probabilities[:,1]
    test_df.to_csv('../../data/sim1/' + arousal_signal.lower() + '_pred_result_df.csv', sep=',')
    
    
#     test_df_mean = test_df[['Prediction', 'Relaxed_Prob', 'Stress_Prob']].groupby(['Prediction']).agg({'Relaxed_Prob': 'mean', 'Stress_Prob': 'mean'})
#     test_df_mean = test_df_mean.apply(lambda x: round(100 * x, 2))
#     print(test_df_mean, '\n')
    #####################################################################################

In [None]:
#####################################################################################
# arousal_signals = ['PP']
arousal_signals = ['PP', 'PP_2', 'HR', 'BR']

# 'PP', 'PP_2', 'HR', 'BR'
# 'PP_HR', 'HR_BR', 'PP_BR', 'PP_HR_BR'
#####################################################################################


#####################################################################################
model_train_method = 'kfold' ### ['best_accurate', 'kfold', 'bootstrap']

if model_train_method=='best_accurate':
    _range=1
    random_selection=False
    
elif model_train_method=='bootstrap':
    _range=30
    random_selection=True
    
elif model_train_method=='kfold':
    subjects = np.array(model_df.Subject.unique())
    kf = KFold(n_splits=5)
#####################################################################################



signal_metrics = {}



for arousal_signal in arousal_signals:
    print('----------------------------------------------------')
    print('------------------------', arousal_signal, '------------------------')
    print('----------------------------------------------------\n')
    
    signal_metrics[arousal_signal] = {
        'Accuracy': [],
        'AUC': [],
        'F1': [],
        'Recall': [],
        'Precision': [],
        'Specificity': [],
    }
  
    if model_train_method=='kfold':
        for _, test_index in kf.split(subjects):
            get_bootstrap_results(model_df, model_features, arousal_signal, model_train_method, subjects[test_index])
            
    else:
        for i in range(_range):
            # print('\n--------------------------------------------- Iteration: ', i+1)
            get_bootstrap_results(model_df, model_features, arousal_signal, model_train_method)
    
    
# print(signal_metrics)

In [None]:
print(signal_metrics)


# {'PP': {'Accuracy': [0.96, 0.96, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.96, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95], 'F1': [0.95, 0.95, 0.95, 0.95, 0.95, 0.94, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.94, 0.95, 0.94, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95], 'Recall': [0.96, 0.96, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.96, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95], 'Precision': [0.96, 0.96, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.96, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.96, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95]}, 'PP_2': {'Accuracy': [0.81, 0.8, 0.81, 0.8, 0.81, 0.8, 0.8, 0.81, 0.82, 0.81, 0.82, 0.81, 0.81, 0.82, 0.82, 0.81, 0.8, 0.82, 0.81, 0.81, 0.82, 0.81, 0.82, 0.81, 0.8, 0.82, 0.8, 0.82, 0.8, 0.81], 'F1': [0.78, 0.78, 0.78, 0.77, 0.78, 0.78, 0.77, 0.78, 0.79, 0.78, 0.79, 0.78, 0.78, 0.79, 0.79, 0.79, 0.77, 0.79, 0.78, 0.79, 0.79, 0.78, 0.79, 0.78, 0.77, 0.79, 0.78, 0.79, 0.78, 0.79], 'Recall': [0.81, 0.8, 0.81, 0.8, 0.81, 0.8, 0.8, 0.81, 0.82, 0.81, 0.82, 0.81, 0.81, 0.82, 0.82, 0.81, 0.8, 0.82, 0.81, 0.81, 0.82, 0.81, 0.82, 0.81, 0.8, 0.82, 0.8, 0.82, 0.8, 0.81], 'Precision': [0.8, 0.79, 0.81, 0.78, 0.8, 0.8, 0.79, 0.8, 0.83, 0.79, 0.82, 0.8, 0.8, 0.82, 0.83, 0.8, 0.79, 0.81, 0.8, 0.8, 0.83, 0.8, 0.83, 0.81, 0.79, 0.83, 0.79, 0.81, 0.79, 0.81]}, 'HR': {'Accuracy': [0.73, 0.72, 0.72, 0.72, 0.73, 0.72, 0.73, 0.72, 0.71, 0.73, 0.72, 0.72, 0.72, 0.72, 0.71, 0.73, 0.72, 0.72, 0.72, 0.72, 0.71, 0.72, 0.72, 0.72, 0.72, 0.73, 0.72, 0.73, 0.72, 0.71], 'F1': [0.73, 0.72, 0.72, 0.72, 0.73, 0.72, 0.73, 0.72, 0.71, 0.72, 0.72, 0.72, 0.71, 0.72, 0.71, 0.72, 0.72, 0.72, 0.72, 0.72, 0.71, 0.72, 0.71, 0.72, 0.72, 0.73, 0.72, 0.73, 0.72, 0.71], 'Recall': [0.73, 0.72, 0.72, 0.72, 0.73, 0.72, 0.73, 0.72, 0.71, 0.73, 0.72, 0.72, 0.72, 0.72, 0.71, 0.73, 0.72, 0.72, 0.72, 0.72, 0.71, 0.72, 0.72, 0.72, 0.72, 0.73, 0.72, 0.73, 0.72, 0.71], 'Precision': [0.73, 0.72, 0.73, 0.72, 0.73, 0.72, 0.73, 0.72, 0.71, 0.73, 0.73, 0.72, 0.72, 0.72, 0.71, 0.73, 0.72, 0.72, 0.73, 0.72, 0.72, 0.72, 0.72, 0.72, 0.72, 0.73, 0.72, 0.73, 0.72, 0.71]}, 'BR': {'Accuracy': [0.79, 0.81, 0.8, 0.78, 0.78, 0.79, 0.79, 0.78, 0.79, 0.78, 0.78, 0.79, 0.79, 0.79, 0.78, 0.79, 0.79, 0.78, 0.81, 0.78, 0.79, 0.78, 0.8, 0.79, 0.8, 0.8, 0.78, 0.78, 0.79, 0.79], 'F1': [0.79, 0.81, 0.79, 0.77, 0.78, 0.78, 0.79, 0.77, 0.79, 0.78, 0.78, 0.78, 0.79, 0.78, 0.78, 0.79, 0.79, 0.78, 0.8, 0.78, 0.78, 0.78, 0.79, 0.79, 0.8, 0.79, 0.78, 0.78, 0.79, 0.79], 'Recall': [0.79, 0.81, 0.8, 0.78, 0.78, 0.79, 0.79, 0.78, 0.79, 0.78, 0.78, 0.79, 0.79, 0.79, 0.78, 0.79, 0.79, 0.78, 0.81, 0.78, 0.79, 0.78, 0.8, 0.79, 0.8, 0.8, 0.78, 0.78, 0.79, 0.79], 'Precision': [0.8, 0.82, 0.81, 0.78, 0.79, 0.79, 0.8, 0.78, 0.8, 0.79, 0.79, 0.8, 0.8, 0.79, 0.8, 0.8, 0.8, 0.79, 0.81, 0.8, 0.79, 0.79, 0.81, 0.8, 0.81, 0.8, 0.79, 0.79, 0.8, 0.79]}, 'PP_HR_BR': {'Accuracy': [0.76, 0.75, 0.76, 0.75, 0.75, 0.75, 0.75, 0.73, 0.76, 0.76, 0.76, 0.75, 0.75, 0.76, 0.76, 0.76, 0.76, 0.75, 0.76, 0.75, 0.76, 0.75, 0.76, 0.75, 0.76, 0.75, 0.76, 0.76, 0.76, 0.76], 'F1': [0.74, 0.74, 0.75, 0.74, 0.73, 0.74, 0.73, 0.72, 0.74, 0.74, 0.75, 0.74, 0.74, 0.75, 0.75, 0.74, 0.75, 0.74, 0.74, 0.73, 0.75, 0.74, 0.75, 0.74, 0.74, 0.74, 0.74, 0.74, 0.75, 0.75], 'Recall': [0.76, 0.75, 0.76, 0.75, 0.75, 0.75, 0.75, 0.73, 0.76, 0.76, 0.76, 0.75, 0.75, 0.76, 0.76, 0.76, 0.76, 0.75, 0.76, 0.75, 0.76, 0.75, 0.76, 0.75, 0.76, 0.75, 0.76, 0.76, 0.76, 0.76], 'Precision': [0.74, 0.74, 0.75, 0.74, 0.73, 0.74, 0.73, 0.72, 0.74, 0.75, 0.75, 0.74, 0.74, 0.75, 0.75, 0.75, 0.75, 0.74, 0.74, 0.73, 0.75, 0.74, 0.75, 0.74, 0.74, 0.74, 0.74, 0.74, 0.75, 0.75]}}

In [None]:
# print(signal_metrics['PP']['Accuracy'])
# print(signal_metrics['HR']['Accuracy'])
# print(len(signal_metrics['PP']['Accuracy']))
# print(len(signal_metrics['HR']['Accuracy']))

In [None]:
for arousal_signal in arousal_signals:
    arousal_signal_metrics = signal_metrics[arousal_signal]
    print("\n\n" + arousal_signal + " ----> ")
    for metric in ['Accuracy', 'AUC', 'F1', 'Recall', 'Precision', 'Specificity']:
        metric_numbers = arousal_signal_metrics[metric]

        if model_train_method=='best_accurate':
            print(metric + ": " + get_rounded_str(metric_numbers[0], 2))
        elif model_train_method=='bootstrap':
            print(metric + ": " + 
                  get_rounded_str(mean(metric_numbers), 2) + u" \u00B1 " +
                  get_rounded_str(stdev(metric_numbers), 3))
        elif model_train_method=='kfold':
            print(metric + ": " + get_rounded_str(mean(metric_numbers), 2))

In [None]:
# import pprint
# pp = pprint.PrettyPrinter(indent=1)

# pp.pprint(signal_metrics)

In [None]:
# # print(model_features)

# plot_df = model_df.copy().rename(columns={
#                              'Gender_Female': 'Gender', 
#                              'Effort': 'NASA_Effort',
#                              'Frustration': 'NASA_Frustration', 
#                              'Mental_Demand': 'NASA_Mental',
#                              'Performance': 'NASA_Performance', 
#                              'Physical_Demand': 'NASA_Physical',
#                              'Temporal_Demand': 'NASA_Temporal', 
#                              'NASA_Total_Sum': 'NASA_Total',
#                              'Hr_Mean': 'HR_Mean',
#                              'Hr_SD': 'HR_SD',
#                              'Hr_Median': 'HR_Median',
#                              'Hr_SS': 'HR_SS',
#                              'Br_Mean': 'BR_Mean',
#                              'Br_SD': 'BR_SD',
#                              'Br_Median': 'BR_Median',
#                              'Br_SS': 'BR_SS'
#                             })


# # print(plot_df.columns)

# plot_features = ['Age',
#                  'Gender',
                 
#                  'NASA_Mental',
#                  'NASA_Physical',
#                  'NASA_Effort',
#                  'NASA_Frustration',
#                  'NASA_Temporal',
#                  'NASA_Performance', 
#                  'NASA_Total',
                 
#                  'Perinasal_Mean', 
#                  'Perinasal_Median', 
#                  'Perinasal_SD',
#                  'Perinasal_SS', 
                 
#                  'HR_Mean', 
#                  'HR_Median', 
#                  'HR_SD', 
#                  'HR_SS', 
                 
#                  'BR_Mean',
#                  'BR_Median', 
#                  'BR_SD', 
#                  'BR_SS']

# # print(len(model_features), len(plot_features)) ## 22, 21
# # print(plot_features)

In [None]:
#################
# DO NOT DELETE #
#################



##################################
####----   Old Features   ----####
##################################
# cor_df = model_df.copy()[['Arousal_Mode',
#                           'Perinasal_Mean', 'Perinasal_SD', 'Perinasal_Median', 'Perinasal_SS', 
#                           'Palm_Mean', 'Palm_SD', 'Palm_Median', 'Palm_SS', 
#                           'Hr_Mean', 'Hr_SD', 'Hr_Median', 'Hr_SS', 
#                           'Br_Mean', 'Br_SD', 'Br_Median', 'Br_SS',
#                           'Drive_Label_CD', 'Drive_Label_ED', 
#                           'Drive_Label_FD', 'Drive_Label_MD'
#                            ]]


#############################################
####---- only physiological Features ----####
#############################################
# cor_df = model_df.copy()[['Arousal_Mode', 
#                           'Perinasal_Mean', 'Perinasal_SD',
#                           'Palm_Mean', 'Palm_SD', 
#                           'Hr_Mean', 'Hr_SD',
#                           'Br_Mean', 'Br_SD',
#                           'Drive_Label_CD', 'Drive_Label_ED', 
#                           'Drive_Label_FD', 'Drive_Label_MD'
#                            ]]




# ###################################
# ####----   Plot Features   ----####
# ###################################
# cor_df = plot_df.copy()[['Arousal_Mode'] + plot_features]
# corr = cor_df.corr().round(2)








#################################
###----   All Features   ----####
#################################
cor_df = model_df.copy()[['Arousal_Mode'] + model_features]
corr = cor_df.corr().round(2)








##################################################################
reset_plt()
plt.figure(figsize=(40, 40))
plt.subplot(1, 1, 1)

sns_plt = sns.heatmap(corr, cmap="YlGnBu", center=0, square=True, linewidths=.5, annot=True, annot_kws={"size": 24})
sns_plt.collections[0].colorbar.ax.tick_params(labelsize=32)

sns_plt.set_xticklabels(sns_plt.get_xticklabels(), rotation = 45, fontsize = 34, ha='right')  # 45
sns_plt.set_yticklabels(sns_plt.get_yticklabels(), rotation = 0, fontsize = 34)


# sns_plt.axes.set_title("Title",fontsize=50)
# sns_plt.set_xlabel("X Label",fontsize=30)
# sns_plt.set_ylabel("Y Label",fontsize=20)
# sns_plt.tick_params(labelsize=5)
# sns_plt.plt.show()


plt.savefig(figure_path + 'all_features_correlation_plot.png')
plt.savefig(figure_path + 'all_features_correlation_plot.pdf')

plt.show()

In [None]:
##########################
# NON USED CODES
##########################


# IFrame("../../data/sim1/figures/pp_arousal_prediction_Cognitive_sd.pdf", width=900, height=600)



# result_df = pd.DataFrame({
#         'actual': y_test,
#         'prediction': y_pred,
#         'err': y_test == y_pred
#     }).sort_values('err', ascending = False)
# result_df.to_csv("../../data/sim1/test/result_" + arousal_signal + ".csv", sep=',')










# ########################
# PP -->
# ########################
# Accuracy: 0.779839208410637
# F1: 0.7578831147346705
# Recall: 0.779839208410637
# Precision: 0.7987647247206715

# #################################
# PP --> Best Bootstraping for PP
# #################################
# Accuracy: 0.8556223970384081
# F1: 0.8416228891876154
# Recall: 0.8556223970384081
# Precision: 0.8378525827410469






#########################
# PP_2 -->
#########################
# Accuracy: 0.7414965986394558
# F1: 0.7382594058202663
# Recall: 0.7414965986394558
# Precision: 0.7647435765692131

##################################
# PP_2 --> Best Bootstraping for PP
##################################
# Accuracy: 0.7431744562702453
# F1: 0.7332696262099785
# Recall: 0.7431744562702453
# Precision: 0.736553172654085


    
    
    
    
#########################
# HR -->
#########################
# Accuracy: 0.6487322201607916
# F1: 0.5998199331532664
# Recall: 0.6487322201607916
# Precision: 0.6352522628886266

##################################
# HR --> Best Bootstraping for PP
##################################
# Accuracy: 0.6598796853308654
# F1: 0.659423920692846
# Recall: 0.6598796853308654
# Precision: 0.6590498579113212




#########################
# BR -->
#########################
# Accuracy: 0.6951144094001237
# F1: 0.6916477146798585
# Recall: 0.6951144094001237
# Precision: 0.7030998641785814

##################################
# BR --> Best Bootstraping for PP
##################################
# Accuracy: 0.7366959740860712
# F1: 0.7339665422228959
# Recall: 0.7366959740860712
# Precision: 0.7382492602016391


In [None]:
# ################################################################################################
# # Ref: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html
# ################################################################################################
# X = np.array(model_df.Subject.unique())
# kf = KFold(n_splits=5)
# kf.get_n_splits(X)
# print(kf)
# KFold(n_splits=5, random_state=None, shuffle=False)

# for train_index, test_index in kf.split(X):
#     X_train, X_test = X[train_index], X[test_index]
# #     print("TRAIN:", X[train_index], "TEST:", X[test_index])
#     print("TRAIN_IDX:", train_index, "TEST_IDX:", test_index)