# Common Libraries

In [2]:
import os
import pandas as pd
import numpy as np

import random
import itertools
import pprint
import copy
import json 
import collections
import itertools

from collections import defaultdict
from statistics import mean, stdev

import scipy.stats as stats
from scipy.stats import mode

from datetime import datetime
import time

from IPython.display import IFrame, display
import matplotlib.pyplot as plt
import seaborn as sns


from sklearn.preprocessing import StandardScaler, LabelBinarizer
from sklearn.model_selection import train_test_split, RandomizedSearchCV, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix, ConfusionMatrixDisplay, roc_auc_score, plot_roc_curve
from sklearn.neighbors import KNeighborsClassifier


import keras
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Flatten, Dropout
# from keras.utils import to_categorical
# from tensorflow.keras.utils import to_categorical


import tensorflow as tf
import tensorflow.keras as tf_k
import tensorflow_addons as tf_a
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, TimeDistributed, Conv1D, MaxPooling1D, Flatten, Bidirectional, Input, Flatten, Activation, Reshape, RepeatVector, Concatenate, ConvLSTM1D
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.regularizers import l2


from keras import backend as K


import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None





# Common Variables
## Remember - Don't put any variable that may change

In [3]:
all_metrics = {
    'Accuracy': [],
    'AUC': [],
    'F1': [],
    'Recall': [],
    'Precision': [],
    'Specificity': [],
}



metrics_df = pd.DataFrame({
    'Model': [],
    'Arousal_Signal': [],
    
    'Train_Study': [],
    'Test_Study': [],
})

for metric in all_metrics.keys():
    metrics_df['Train_' + metric] = []
    metrics_df['Test_' + metric] = []
        



        
        
        
model_features = [
    'Age', 'Gender',     
    'NASA_Physical', 'NASA_Effort', 'NASA_Frustration', 'NASA_Temporal', 'NASA_Performance',          
    'PP_Mean', 'PP_SD',     
    'HR_Mean', 'HR_SD',
    'BR_Mean', 'BR_SD'
]




####################################################################################
model_train_method = 'kfold' ### ['best_accurate', 'kfold', 'bootstrap']

# if model_train_method=='best_accurate':
#     _range=1
#     random_selection=False
    
# elif model_train_method=='bootstrap':
#     _range=30
#     random_selection=True
    
# elif model_train_method=='kfold':
#     kf = KFold(n_splits=k_fold_n_splits)
####################################################################################



prediction_threshold = 0.5


non_arousal_threshold_ecdf = 0.33
arousal_threshold_ecdf = 0.67
    
    
dnn_model_name = None

running_study = None

In [4]:
sim1 = 'sim1'
sim2 = 'sim2'
tt1 = 'tt1'
office_tasks = 'office_tasks'
deadline_study = 'deadline_study'
all_studies = 'all_studies'


data_dir = '../../data/'
fig_dir = '../../figure/'
metrics_dir = 'metrics'
models_dir = 'models'


def add_path(path1, path2):
    return path1+path2+'/'




sim1_data_dir = add_path(data_dir, sim1)
# sim1_fig_dir = add_path(fig_dir, sim1)

sim2_data_dir = add_path(data_dir, sim2)
# sim2_fig_dir = add_path(fig_dir, sim2)

tt1_data_dir = add_path(data_dir, tt1)
# tt1_fig_dir = add_path(fig_dir, tt1)

office_tasks_data_dir = add_path(data_dir, office_tasks)
# office_tasks_fig_dir = add_path(fig_dir, office_tasks)

deadline_study_data_dir = add_path(data_dir, deadline_study)
# deadline_study_fig_dir = add_path(fig_dir, deadline_study)

all_studies_data_dir = add_path(data_dir, all_studies)
all_studies_fig_dir = add_path(fig_dir, all_studies)



In [5]:
study_ecdf_combinations = {
    'sim1___sim2': ['sim1', 'sim2'],
    'sim1___sim2___tt1': ['sim1', 'sim2', 'tt1'],
    'sim1___sim2___office_tasks': ['sim1', 'sim2', 'office_tasks'],
    'sim1___sim2___tt1___office_tasks': ['sim1', 'sim2', 'tt1', 'office_tasks'],
}

In [None]:
def get_study_combinations(studies):

    study_combinations = {}

    for i in range(1, len(studies)+1):
        # print('\nTotal Train Study: ' + str(i))
        for study_subset in itertools.combinations(studies, i):
            train_studies = list(study_subset)
            test_studies = [study for study in studies if study not in train_studies]  
            study_combination = '___'.join(study_subset)
            study_combinations[study_combination] = {'Train': train_studies, 'Test': test_studies}
            # print(train_studies, test_studies, study_combination)

    # pprint.pprint(study_combinations)
    

    all_study_combinations = studies + list(study_ecdf_combinations.keys())
    # print(all_study_combinations)
    
    return study_combinations, all_study_combinations

In [8]:
### studies = [sim1, sim2, tt1, office_tasks, deadline_study]
studies = [sim1, sim2, tt1, office_tasks]

study_combinations, all_study_combinations = get_study_combinations(studies+[deadline_study])
all_study_combinations

# Total Train Study: 1
# ['sim1'] ['sim2', 'tt1', 'office_tasks'] sim1
# ['sim2'] ['sim1', 'tt1', 'office_tasks'] sim2
# ['tt1'] ['sim1', 'sim2', 'office_tasks'] tt1
# ['office_tasks'] ['sim1', 'sim2', 'tt1'] office_tasks

# Total Train Study: 2
# ['sim1', 'sim2'] ['tt1', 'office_tasks'] sim1___sim2
# ['sim1', 'tt1'] ['sim2', 'office_tasks'] sim1___tt1
# ['sim1', 'office_tasks'] ['sim2', 'tt1'] sim1___office_tasks
# ['sim2', 'tt1'] ['sim1', 'office_tasks'] sim2___tt1
# ['sim2', 'office_tasks'] ['sim1', 'tt1'] sim2___office_tasks
# ['tt1', 'office_tasks'] ['sim1', 'sim2'] tt1___office_tasks

# Total Train Study: 3
# ['sim1', 'sim2', 'tt1'] ['office_tasks'] sim1___sim2___tt1
# ['sim1', 'sim2', 'office_tasks'] ['tt1'] sim1___sim2___office_tasks
# ['sim1', 'tt1', 'office_tasks'] ['sim2'] sim1___tt1___office_tasks
# ['sim2', 'tt1', 'office_tasks'] ['sim1'] sim2___tt1___office_tasks

# Total Train Study: 4
# ['sim1', 'sim2', 'tt1', 'office_tasks'] [] sim1___sim2___tt1___office_tasks





# dict_keys(['sim1', 
#            'sim2', 
#            'tt1', 
#            'office_tasks', 
#            'sim1___sim2', 
#            'sim1___tt1', 
#            'sim1___office_tasks', 
#            'sim2___tt1', 
#            'sim2___office_tasks', 
#            'tt1___office_tasks', 
#            'sim1___sim2___tt1', 
#            'sim1___sim2___office_tasks', 
#            'sim1___tt1___office_tasks', 
#            'sim2___tt1___office_tasks', 
#            'sim1___sim2___tt1___office_tasks'])






# {'office_tasks': {'Test': ['sim1', 'sim2', 'tt1'], 'Train': ['office_tasks']},
#  'sim1': {'Test': ['sim2', 'tt1', 'office_tasks'], 'Train': ['sim1']},
#  'sim1___office_tasks': {'Test': ['sim2', 'tt1'],
#                          'Train': ['sim1', 'office_tasks']},
#  'sim1___sim2': {'Test': ['tt1', 'office_tasks'], 'Train': ['sim1', 'sim2']},
#  'sim1___sim2___office_tasks': {'Test': ['tt1'],
#                                 'Train': ['sim1', 'sim2', 'office_tasks']},
#  'sim1___sim2___tt1': {'Test': ['office_tasks'],
#                        'Train': ['sim1', 'sim2', 'tt1']},
#  'sim1___sim2___tt1___office_tasks': {'Test': [],
#                                       'Train': ['sim1',
#                                                 'sim2',
#                                                 'tt1',
#                                                 'office_tasks']},
#  'sim1___tt1': {'Test': ['sim2', 'office_tasks'], 'Train': ['sim1', 'tt1']},
#  'sim1___tt1___office_tasks': {'Test': ['sim2'],
#                                'Train': ['sim1', 'tt1', 'office_tasks']},
#  'sim2': {'Test': ['sim1', 'tt1', 'office_tasks'], 'Train': ['sim2']},
#  'sim2___office_tasks': {'Test': ['sim1', 'tt1'],
#                          'Train': ['sim2', 'office_tasks']},
#  'sim2___tt1': {'Test': ['sim1', 'office_tasks'], 'Train': ['sim2', 'tt1']},
#  'sim2___tt1___office_tasks': {'Test': ['sim1'],
#                                'Train': ['sim2', 'tt1', 'office_tasks']},
#  'tt1': {'Test': ['sim1', 'sim2', 'office_tasks'], 'Train': ['tt1']},
#  'tt1___office_tasks': {'Test': ['sim1', 'sim2'],
#                         'Train': ['tt1', 'office_tasks']}}



['sim1',
 'sim2',
 'tt1',
 'office_tasks',
 'deadline_study',
 'sim1___sim2',
 'sim1___sim2___tt1',
 'sim1___sim2___office_tasks',
 'sim1___sim2___tt1___office_tasks']

In [35]:
def rename_cols(df):
    df = df.copy().rename(columns={
        
         'Gender_Female': 'Gender', 
         'Effort': 'NASA_Effort',
         'Frustration': 'NASA_Frustration', 
         'Performance': 'NASA_Performance',
        
        
         'Mental_Demand': 'NASA_Mental',
         'Physical_Demand': 'NASA_Physical',
         'Temporal_Demand': 'NASA_Temporal', 
         'NASA_Total_Sum': 'NASA_Total',
        
         'Mental Demand': 'NASA_Mental',
         'Physical Demand': 'NASA_Physical',
         'Temporal Demand': 'NASA_Temporal', 
         'NASA Total Sum': 'NASA_Total',
        
        
         'Perinasal_Mean': 'PP_Mean',
         'Perinasal_SD': 'PP_SD',
         'Perinasal_Median': 'PP_Median', 
         'Perinasal_SS': 'PP_SS',
        
        
         'Hr_Mean': 'HR_Mean',
         'Hr_SD': 'HR_SD',
         'Hr_Median': 'HR_Median',
         'Hr_SS': 'HR_SS',
        
         'Br_Mean': 'BR_Mean',
         'Br_SD': 'BR_SD',
         'Br_Median': 'BR_Median',
         'Br_SS': 'BR_SS',
        
        
         'Heart_Mean': 'HR_Mean',
         'Heart_SD': 'HR_SD',
         'Heart_Median': 'HR_Median',
         'Heart_SS': 'HR_SS',
        
         'Breathing_Mean': 'BR_Mean',
         'Breathing_SD': 'BR_SD',
         'Breathing_Median': 'BR_Median',
         'Breathing_SS': 'BR_SS'
    })
    
    return df



def change_ground_truth_labels(ground_truth_label):
    if ground_truth_label=='relaxed':
        new_ground_truth_label='non-arousal'
    elif ground_truth_label=='stressed':
        new_ground_truth_label='arousal'
    else:
        new_ground_truth_label=ground_truth_label
        
    return new_ground_truth_label




def get_model(model_name='Random_Forest'):
    
    # Code for extension
    if model_name=='Random_Forest':
        model = RandomForestClassifier(
            n_estimators = 200,
            max_features = 'auto',
            bootstrap = True)
    elif model_name=='KNN':
        model = KNeighborsClassifier(n_neighbors=3)
        
    return model

In [36]:
def get_date_time():
    return datetime.now().strftime('%m_%d_%Y_%H_%M_%S')


metrics_point = 6

def get_round(num, point=2): 
    return round(num, metrics_point) # point

def get_rounded_str(num, point):
    return str(get_round(num, metrics_point)) # point


sum_of_squares = lambda x: sum(x**2)


def concat_df(root_path, files, final_file_name):
    final_df = pd.DataFrame()
    for file in files:
        df = pd.read_csv(root_path+file)
        final_df = pd.concat([final_df, df])
        
    final_df.to_csv(root_path+final_file_name)

    
def print_row_count(df, col_name):
    df_count = df.groupby([col_name]).agg({col_name: 'count'})
    print(df_count, '\n')

def print_percentage(df, col_name):
    df_count = df.groupby([col_name]).agg({col_name: 'count'})
    df_percentage = df_count.apply(lambda x: round(100 * x / float(x.sum()), 2))
    print(df_percentage, '\n')


def get_study_subject_name(df):
    df['Study_Subject'] = df.Study_Name + '_' + df.Subject.map(str)
    return df

def get_splitted_data(final_df, model_features, arousal_signal, predict_col, model_train_method, study_subjects_test=None):
    
    unique_study_subjs = final_df.Study_Subject.unique()
    
    df_count = final_df.groupby(['Arousal_Mode']).agg({'Arousal_Mode': 'count'})
    df_count = df_count.apply(lambda x: 100 * x / float(x.sum()))
    # print(df_count.head(2))

    ########################################################################
    '''
    train_subj_end = 65

    train_df = final_df[final_df.Subject < train_subj_end]
    test_df = final_df[final_df.Subject >= train_subj_end]
    
    train_subjs = train_df.Subject.unique()
    test_subjs = test_df.Subject.unique()
    
    print('\n\nTest Subjects -->\n', test_subjs)
    '''
    ########################################################################
    
    
    
    ########################################################################
    # '''
    if model_train_method=='kfold':
        test_study_subjs = study_subjects_test
#     elif model_train_method=='best_accurate':
#         test_subjs = get_test_subjs(arousal_signal)
#     elif model_train_method=='bootstrap':
#         test_subjs = random.sample(list(final_df.Subject.unique()), 7)
        
    
    train_study_subjs = [subj for subj in unique_study_subjs if subj not in test_study_subjs]
    
    train_df = final_df[final_df.Study_Subject.isin(train_study_subjs)]
    test_df = final_df[final_df.Study_Subject.isin(test_study_subjs)]
    
    # print('\n\nTest Subjects -->\n', test_subjs)
    # '''
    ########################################################################
    
    
    
#     print('\n\nTotal Train Subjects: ' + str(len(train_subjs)) + '    Percentage: ' + str(round(100*len(train_subjs)/len(unique_subjs), 2)) + '%')
#     print('Total Test Subjects: ' + str(len(test_subjs)) + '    Percentage: ' + str(round(100*len(test_subjs)/len(unique_subjs), 2)) + '%' + '\n\n')


    train_df_count = train_df.groupby(['Arousal_Mode']).agg({'Arousal_Mode': 'count'})
    train_df_count = train_df_count.apply(lambda x: 100 * x / float(x.sum()))
#     print('Train Data Percentage -->')
#     print(train_df_count)


    test_df_count = test_df.groupby(['Arousal_Mode']).agg({'Arousal_Mode': 'count'})
    test_df_count = test_df_count.apply(lambda x: 100 * x / float(x.sum()))
#     print('\nTest Data Percentage -->')
#     print(test_df_count)


#     print('\n\nTotal Train Rows: ' + str(len(train_df)) + '    Percentage: ' + str(round(100*len(train_df)/len(final_df), 2)) + '%')
#     print('Total Test Rows: ' + str(len(test_df)) + '    Percentage: ' + str(round(100*len(test_df)/len(final_df), 2)) + '%' + '\n\n')

    
    X_train = train_df[model_features]
    y_train = train_df[[predict_col]]

    X_test = test_df[model_features]
    y_test = test_df[[predict_col]]
    
    return X_train, y_train, X_test, y_test, train_df, test_df


In [9]:
def get_confusion_matrix(arousal_signal, y_test, y_pred, do_normalize=True):
    
    #####################################################################################
    reset_plt()
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)

    #-------------------------------------------------------------#
    # labels = ['normal', 'relaxed', 'stressed', 'unknown']
    # labels = ['relaxed', 'normal', 'unknown', 'stressed']
    # labels = ['relaxed', 'stressed']
    labels = sorted(train_df.Arousal_Mode.unique())
    #-------------------------------------------------------------#
    
    if do_normalize:
        conf_mat = pd.DataFrame(confusion_matrix(y_test, y_pred, labels=labels, normalize='all'))
        sns.heatmap(conf_mat*100, annot=True, annot_kws={'size': 24})
        # plt.title('Confusion Matrix - ' + arousal_signal + ' %', fontsize=40)
        # plot_name = arousal_signal.lower() + '_percentage'
        
    else:
        conf_mat = pd.DataFrame(confusion_matrix(y_test, y_pred, labels=labels))
        # annot = [str(val)+'%' for val in conf_mat]
        sns.heatmap(conf_mat, annot=True, fmt='d', annot_kws={'size': 24})
        # plt.title('Confusion Matrix - ' + arousal_signal, fontsize=40)
        # plot_name = arousal_signal.lower() 

    ax.collections[0].colorbar.ax.tick_params(labelsize=28)
    
    ax.set_title('%')
    
    ax.set_xticklabels(labels, fontsize=24)
    ax.set_yticklabels(labels, fontsize=24)
    
    ax.set_xlabel('Predicted', fontsize=32)
    ax.set_ylabel('Actual', fontsize=32)

    plt.savefig(figure_path + plot_name + '_confusion_matrix.png')
    plt.savefig(figure_path + plot_name + '_confusion_matrix.pdf')
    
    plt.show()
    #####################################################################################
    
    

def get_all_confusion_matrices(arousal_signal, train_df, test_df, y_test, y_pred):
    
    get_confusion_matrix(arousal_signal, train_df, test_df, y_test, y_pred, do_normalize=True)
    get_confusion_matrix(arousal_signal, train_df, test_df, y_test, y_pred, do_normalize=False)

    #####################################################################################
    # print('\n\nConfusion Matrix:')
    # print(pd.DataFrame(confusion_matrix(y_test, y_pred, labels=labels)))
    #####################################################################################
    
    
    #####################################################################################
    labels = sorted(train_df.Arousal_Mode.unique())
    
    reset_plt()
    fig, axs = plt.subplots(3, figsize = (12, 36))
    fig.suptitle(arousal_signal)

    for i, treatment in enumerate(test_df.Treatment.unique()):
        treatment_test_df = test_df.copy()[test_df.Treatment == treatment]
        y_test_treatment = treatment_test_df.Arousal_Mode
        y_pred_treatment = treatment_test_df.Prediction  ### Comment out the line --> test_df['Prediction'] = y_pred

        conf_mat = pd.DataFrame(confusion_matrix(y_test_treatment, y_pred_treatment, labels = labels))
        axis = axs[i]

        sns.heatmap(conf_mat, annot=True, fmt='d', annot_kws={'size': 24}, ax = axis)
        axis.collections[0].colorbar.ax.tick_params(labelsize=32)
        
        axis.title.set_text(get_treatment_name(treatment))
        axis.set_xticklabels(labels, fontsize=24)
        axis.set_yticklabels(labels, fontsize=24)
        axis.set_xlabel('Predicted', fontsize=24)
        axis.set_ylabel('Actual', fontsize=24)

#     plt.savefig(figure_path + arousal_signal.lower() + '_treatment_confusion_matrix.png')
#     plt.savefig(figure_path + arousal_signal.lower() + '_treatment_confusion_matrix.pdf')
    
#     plt.show()
    #####################################################################################

    
    
    
def get_feature_importance(model, X_train):
    
    df_feature_importance = pd.DataFrame(model.feature_importances_, 
                                     index=X_train.columns, 
                                     columns=['feature importance']).sort_values('feature importance', ascending=False)
    feature_imp = pd.Series(model.feature_importances_,
                            index=X_train.columns).sort_values(ascending=False).round(2)

    reset_plt()
    plt.figure(figsize=(25, 15))
    
    sns_plt = sns.barplot(x=feature_imp, y=feature_imp.index) 
    
    # plt.title(arousal_signal + ' - Important Features', fontsize=36)
    # sns_plt.axes.set_title(arousal_signal + ' - Important Features', fontsize=36)
    
    sns_plt.set_xlabel('Feature Importance Score', fontsize=36)
    sns_plt.set_ylabel('Features', fontsize=36)

    sns_plt.tick_params(labelsize=24)
#     sns_plt.set_xticklabels(sns_plt.get_xticklabels(), fontsize = 24)
    sns_plt.set_yticklabels(sns_plt.get_yticklabels(), fontsize = 24, rotation = 30)

    plt.savefig(all_studies_fig_dir + study_combination + '_' + arousal_signal.lower() + '_feature_importance.png')
    plt.savefig(all_studies_fig_dir + study_combination + '_' + arousal_signal.lower() + '_feature_importance.pdf')
    
    plt.show()
    
    
    
    
# def get_correlation_plot(train_df, arousal_signal, selected_model_features):
#     cor_df = train_df.copy()[['Arousal_Mode'] + selected_model_features]
#     corr = cor_df.corr().round(3)

#     reset_plt()
#     plt.figure(figsize=(40, 40))
#     plt.subplot(1, 1, 1)
    
#     sns_plt = sns.heatmap(corr, cmap='YlGnBu', center=0, square=True, linewidths=.5, annot=True, annot_kws={'size': 24})
#     sns_plt.collections[0].colorbar.ax.tick_params(labelsize=32)

#     sns_plt.set_xticklabels(sns_plt.get_yticklabels(), rotation = 45, fontsize = 38)
#     sns_plt.set_yticklabels(sns_plt.get_yticklabels(), rotation = 0, fontsize = 38) 
    
# #     plt.savefig(figure_path + arousal_signal.lower() + '_correlation_plot.png')
# #     plt.savefig(figure_path + arousal_signal.lower() + '_correlation_plot.pdf')
    
#     plt.show()
   
    


def multiclass_roc_auc_score(y_test, y_pred, average="macro"):
    lb = LabelBinarizer()
    lb.fit(y_test)
    y_test = lb.transform(y_test)
    y_pred = lb.transform(y_pred) 
    return roc_auc_score(y_test, y_pred, average=average)
    
    
def get_metrics(model, X_test, y_test, y_pred, y_prob=[]):
    Accuracy = get_round(accuracy_score(y_test, y_pred))
    F1 = get_round(f1_score(y_test, y_pred, average='weighted'))
    Recall = get_round(recall_score(y_test, y_pred, average='weighted'))
    Precision = get_round(precision_score(y_test, y_pred, average='weighted'))
    
###############################################################################   
#     if not len(y_prob): 
#         if discard_neutral:
#             y_prob = model.predict_proba(X_test)[:, 1]
#         else:
#             y_prob = model.predict_proba(X_test)
#             y_prob_ = np.argmax(model.predict_proba(X_test), axis=1).ravel()

            
#     if discard_neutral:
#         # Ref: https://www.analyticsvidhya.com/blog/2020/06/auc-roc-curve-machine-learning/
#         AUC = get_round(roc_auc_score(y_test, y_prob))

#         tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
#         Specificity = get_round(tn / (tn+fp)) 
        
#     else:
#         # AUC = roc_auc_score(y_test, y_prob, multi_class="ovr")
#         AUC = multiclass_roc_auc_score(y_test, y_pred)

#         # AUC = 0
#         Specificity = 0
###############################################################################      
        
    
###############################################################################
    AUC = multiclass_roc_auc_score(y_test, y_pred)
    # AUC = 0
    Specificity = 0
###############################################################################

        
    return Accuracy, AUC, F1, Recall, Precision, Specificity
    
    
    
def train_model(df, model_features, arousal_signal, model_train_method, study_subjects_test=None):

    #####################################################################################
    arousal_col = arousal_signal + '_Arousal_Mode'

    # 'PP_Arousal_Mode', 'HR_Arousal_Mode', 'BR_Arousal_Mode'
    # 'PP_HR_Arousal_Mode', 'HR_BR_Arousal_Mode', 'PP_HR_BR_Arousal_Mode'
    #####################################################################################




    #####################################################################################
    df['Arousal_Mode'] = df[arousal_col]
    # print(df.Arousal_Mode.unique())
    #####################################################################################




    #####################################################################################
    #                               MODELING
    #####################################################################################
    X_train, y_train, X_test, y_test, train_df, test_df = get_splitted_data(df,
                                                                            model_features,
                                                                            arousal_signal,
                                                                            'Arousal_Mode',
                                                                            model_train_method,
                                                                            study_subjects_test) 
    model = get_model()
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    ## test_df['Prediction'] = y_pred
    
    
    Accuracy, AUC, F1, Recall, Precision, Specificity = get_metrics(model, X_test, y_test, y_pred)
    
    model_metrics[arousal_signal]['Train']['Accuracy'].append(Accuracy)
    model_metrics[arousal_signal]['Train']['AUC'].append(AUC)
    model_metrics[arousal_signal]['Train']['F1'].append(F1)
    model_metrics[arousal_signal]['Train']['Recall'].append(Recall)
    model_metrics[arousal_signal]['Train']['Precision'].append(Precision)
    model_metrics[arousal_signal]['Train']['Specificity'].append(Specificity)
    #####################################################################################

    
    
    #####################################################################################
    #                         Feature Importance
    #####################################################################################
    # if feature_imp: 
    # if study_combination=='sim1' and arousal_signal=='PP_BR':
    #     get_feature_importance(model, X_train)
    #####################################################################################
                               
    
    
    
    #####################################################################################                                                         
    # get_confusion_matrix(arousal_signal, y_test, y_pred, do_normalize=True)
    #####################################################################################
    
    
    
    
    
    
#     #####################################################################################
#     plot_roc_curve(model, X_test, y_test)
    
# #     plt.savefig(figure_path + arousal_signal.lower() + '_roc_curve.png')
# #     plt.savefig(figure_path + arousal_signal.lower() + '_roc_curve.pdf')
    
#     plt.show()
#     #####################################################################################
    
    
    
    

    
    
    #####################################################################################
    #                              PLOTTING
    #####################################################################################
    ### get_all_confusion_matrices(arousal_signal, train_df, test_df, y_test, y_pred)
    #####################################################################################
    
    
    
#     #####################################################################################
#     #                        Classification Probabilities
#     #####################################################################################
#     y_pred_probabilities = model.predict_proba(X_test)
#     # print(y_pred_probabilities)
    
#     test_df['Relaxed_Prob'], test_df['Stress_Prob'] = y_pred_probabilities[:,0], y_pred_probabilities[:,1]
# #     test_df.to_csv('../../data/??/' + arousal_signal.lower() + '_pred_result_df.csv', sep=',')
    
    
# #     test_df_mean = test_df[['Prediction', 'Relaxed_Prob', 'Stress_Prob']].groupby(['Prediction']).agg({'Relaxed_Prob': 'mean', 'Stress_Prob': 'mean'})
# #     test_df_mean = test_df_mean.apply(lambda x: round(100 * x, 2))
# #     print(test_df_mean, '\n')
#     #####################################################################################

In [38]:
def get_trained_model(df, arousal_signal):
    
    # print('----------------------------------------> Train Studies: ' + str(df.Study_Name.unique()))
    
    if model_train_method == 'kfold':
        study_subjects = np.array(df.Study_Subject.unique())
        # print(study_subjects)
        for i, indices in enumerate(KFold(n_splits=k_fold_n_splits).split(study_subjects)):
            print('Running ' + str(k_fold_n_splits) + '-fold iteration: ' + str(i+1) + '\n')
            train_model(df, model_features, arousal_signal, model_train_method, study_subjects[indices[1]])
            # print(model_metrics)


######################################################################################$$$$$$$$$$$$$$            
#     else:
#         if model_train_method == 'best_accurate':
#             _range = 1
#             random_selection = False
#         elif model_train_method == 'bootstrap':
#             _range = 30
#             random_selection = True
            
#         for i in range(_range):
#             # print('\n--------------------------------------------- Iteration: ', i+1)
#             train_model(df, model_features, arousal_signal, model_train_method)
######################################################################################$$$$$$$$$$$$$$            
            
        
    
    model = get_model()
    model.fit(df[model_features], df[arousal_signal + '_Arousal_Mode'])


    print_metrics(model_metrics)
        
    return model


def test_model(model, test_study, test_df, arousal_signal):
    X_test = test_df[model_features]
    y_test = test_df[arousal_signal + '_Arousal_Mode']
    
    y_pred = model.predict(X_test)
    
    prediction_df = test_df.copy()
    prediction_df['Prediction'] = y_pred
    
    Accuracy, AUC, F1, Recall, Precision, Specificity = get_metrics(model, X_test, y_test, y_pred)
    
    model_metrics[arousal_signal]['Test'][test_study.upper()]['Accuracy'] = Accuracy
    model_metrics[arousal_signal]['Test'][test_study.upper()]['AUC'] = AUC
    model_metrics[arousal_signal]['Test'][test_study.upper()]['F1'] = F1
    model_metrics[arousal_signal]['Test'][test_study.upper()]['Recall'] = Recall
    model_metrics[arousal_signal]['Test'][test_study.upper()]['Precision'] = Precision
    model_metrics[arousal_signal]['Test'][test_study.upper()]['Specificity'] = Specificity
    
    if print_all: print('Accuracy: %.2f \nAUC: %.2f \nF1: %.2f \nRecall: %.2f \nPrecision: %.2f \nSpecificity: %.2f\n' % (Accuracy, AUC, F1, Recall, Precision, Specificity))

        # print('Accuracy:' + str(Accuracy))
        # print('AUC: ' + str(AUC))
        # print('F1: ' + str(F1))
        # print('Recall: ' + str(Recall))
        # print('Precision: ' + str(Precision))
        # print('Specificity: ' + str(Specificity))
        # print('\n')
        
        
    return prediction_df
    

In [39]:
def print_metrics(model_metrics):
    print('\nAvg. k-fold metrics: ----> ')

    for metric in ['Accuracy', 'AUC', 'F1', 'Recall', 'Precision', 'Specificity']:
    # for metric in ['Accuracy', 'F1', 'Recall', 'Precision']:
        metric_numbers = model_metrics[arousal_signal]['Train'][metric]

        try:
            if model_train_method == 'best_accurate':
                print(metric + ': ' + get_rounded_str(metric_numbers[0], 2))
            elif model_train_method == 'bootstrap':
                print(metric + ': ' + 
                      get_rounded_str(mean(metric_numbers), 2) + u' \u00B1 ' +
                      get_rounded_str(stdev(metric_numbers), 3))
            elif model_train_method == 'kfold':
                metric_val = get_round(mean(metric_numbers), 2)
                model_metrics[arousal_signal]['K_Fold'][metric] = metric_val
                print(metric + ': ' + get_rounded_str(metric_val, 2))
        except Exception as e:
            model_metrics[arousal_signal]['K_Fold'][metric] = None
            # print('Error occured for %s : %s' % (metric, str(e)))

    print('\n')

In [40]:
def get_metric_df():
    metric_df = {}

    for col in metrics_df.columns:
        metric_df[col] = None
     
    return metric_df



def get_test_study_metrics_df(arousal_signal, study_combination, test_study, model_metrics, k_fold=True):

    metric_df = get_metric_df()
    metric_df['Model'] = model_name
    metric_df['Arousal_Signal'] = arousal_signal
    metric_df['Train_Study'] = study_combination
    metric_df['Test_Study'] = test_study
    
    model_metric = 'K_Fold' if k_fold else 'Train'

    for metric in all_metrics.keys():
        metric_df['Train_' + metric] = model_metrics[arousal_signal][model_metric][metric]
        if test_study: metric_df['Test_' + metric] = model_metrics[arousal_signal]['Test'][test_study.upper()][metric]

    return metric_df

In [41]:
def get_overleaf_cols_name(study_name):
    return study_name.replace('_', ' ').upper()

def get_overleaf_modality_name(modality):
    if modality == 'PP':
        return 'PP\\textsubscript{NS}'
    elif modality == 'PP_2':
        return 'PP\\textsubscript{N}'
    elif modality == 'HR':
        return 'HR\\textsubscript{N}'
    elif modality == 'BR':
        return 'BR\\textsubscript{N}'
#     elif modality == 'PP_BR':
#         return 'PP\_BR\\textsubscript{N}'
    
    return modality.replace('_', '\_')
    

    
def get_overleaf_metrics(model_metrics, test_studies, modality, metric):
    metrics = []

    metrics.append(model_metrics[modality]['K_Fold'][metric])
    for test_study in test_studies:
        metrics.append(model_metrics[modality]['Test'][test_study][metric])

    return metrics



def get_overleaf_table(study_combination, model_metrics):
    metric_names = all_metrics if overleaf_metrics else ['AUC', 'F1']
    
    
    modalities = list(model_metrics.keys())
    test_studies = list(model_metrics[list(model_metrics.keys())[0]]['Test'].keys())
    total_test_studies = len(test_studies)
    
    table_begin_str = '\\begin{center} \n\\begin{tabular}\n'
    table_cols_attr = '{|p{2cm}|' +  'p{3.5cm}|'*(total_test_studies+1) + '} \hline \n'
    table_cols_name = '& \\textbf{' + get_overleaf_cols_name(study_combination) + ' - ' + str(k_fold_n_splits) + '-Fold' + '} '
    table_cols_name += ''.join(['& \\textbf{Test on '+ get_overleaf_cols_name(test_study) +'} ' for test_study in test_studies]) 
    table_cols_name += '\\\\ \hline \hline \n'
    table_end_str = '\end{tabular} \n\end{center} \n'

    table_row_metrics = ''
    for modality in modalities:
        table_row_metrics += '\multirow{' + str(len(metric_names)) + '}{1em}{\emph{' + get_overleaf_modality_name(modality) + '}}'
        
        for metric_name in metric_names:
            metrics = get_overleaf_metrics(model_metrics, test_studies, modality, metric_name)
            table_row_metrics += ''.join([(' & ' + metric_name + ': ' + str(metric_val)) for metric_val in metrics]) + ' \\\\ \n'
            
        table_row_metrics += ' \hline \n\n'
    
    overleaf_table = table_begin_str + table_cols_attr + table_cols_name + table_row_metrics + table_end_str
    return overleaf_table



# get_overleaf_table('sim1', ['sim2', 'tt1', 'office_task'], model_metrics)

In [42]:
def reset_plt():
    plt.figure().clear()
    plt.close()
    plt.cla()
    plt.clf()
    plt.figure()
    
    sns.set_context('paper', rc={'font.size': 16,
                                 'axes.titlesize': 24,
                                 'axes.labelsize': 16}) 

In [43]:
def get_correlation_plot():
    #################
    # DO NOT DELETE #
    #################



    ##################################
    ####----   Old Features   ----####
    ##################################
    # cor_df = train_df.copy()[['Arousal_Mode',
    #                           'PP_Mean', 'PP_SD', 'PP_Median', 'PP_SS', 
    #                           'Palm_Mean', 'Palm_SD', 'Palm_Median', 'Palm_SS', 
    #                           'Hr_Mean', 'Hr_SD', 'Hr_Median', 'Hr_SS', 
    #                           'Br_Mean', 'Br_SD', 'Br_Median', 'Br_SS',
    #                           'Treatment_Label_CD', 'Treatment_Label_ED', 
    #                           'Treatment_Label_FD', 'Treatment_Label_MD'
    #                            ]]


    #############################################
    ####---- only physiological Features ----####
    #############################################
    # cor_df = train_df.copy()[['Arousal_Mode', 
    #                           'PP_Mean', 'PP_SD',
    #                           'Palm_Mean', 'Palm_SD', 
    #                           'Hr_Mean', 'Hr_SD',
    #                           'Br_Mean', 'Br_SD',
    #                           'Treatment_Label_CD', 'Treatment_Label_ED', 
    #                           'Treatment_Label_FD', 'Treatment_Label_MD'
    #                            ]]




    # ###################################
    # ####----   Plot Features   ----####
    # ###################################
    # cor_df = plot_df.copy()[['Arousal_Mode'] + plot_features]
    # corr = cor_df.corr().round(2)








    #################################
    ###----   All Features   ----####
    #################################
    cor_df = train_df.copy()[['Arousal_Mode'] + model_features]
    corr = cor_df.corr().round(2)








    ##################################################################
    reset_plt()
    plt.figure(figsize=(40, 40))
    plt.subplot(1, 1, 1)

    sns_plt = sns.heatmap(corr, cmap='YlGnBu', center=0, square=True, linewidths=.5, annot=True, annot_kws={'size': 24})
    sns_plt.collections[0].colorbar.ax.tick_params(labelsize=32)

    sns_plt.set_xticklabels(sns_plt.get_xticklabels(), rotation = 45, fontsize = 34, ha='right')  # 45
    sns_plt.set_yticklabels(sns_plt.get_yticklabels(), rotation = 0, fontsize = 34)


    # sns_plt.axes.set_title('Title',fontsize=50)
    # sns_plt.set_xlabel('X Label',fontsize=30)
    # sns_plt.set_ylabel('Y Label',fontsize=20)
    # sns_plt.tick_params(labelsize=5)
    # sns_plt.plt.show()


#     plt.savefig(figure_path + 'all_features_correlation_plot.png')
#     plt.savefig(figure_path + 'all_features_correlation_plot.pdf')

    plt.show()

In [44]:
#ref: https://cmdlinetips.com/2019/05/empirical-cumulative-distribution-function-ecdf-in-python/
def generate_ecdf_plots(df, idx):
    if idx == 0:
        signal = 'PP'
        title = '$ln PP - \overline{ln PP}_{BL} [ln ^{o}c^{2}]$'
        # title = '$ln PP - \overline{ln PP}_{BL} + 0.5 SD [ln ^{o}c^{2}]$'
        
    # elif idx == 1:
    #     col = 'PP_2'
    #     title = '$ln PP - \overline{ln PP}_{BL} [ln ^{o}c^{2}]$'
        
    elif idx == 1:
        signal = 'HR'
        title = '$HR - \overline{HR}_{BL} [BPM]$'
        
    elif idx == 2:
        signal = 'BR'
        title = '$BR - \overline{BR}_{BL} [BPM]$'
        
    
    col = signal + '_Normalized'
    ecdf_col = signal + '_ecdf'
    
    df[ecdf_col] = df[col].rank(method='max').div(df[col].count())
    
    sorted_values = np.sort(df[col])
    probabilty = np.sort(df[ecdf_col])
    
    
    ###########################################
    # n = sorted_values.size
    # probabilty_2 = np.arange(1, n+1) / n
      
    # print(np.array_equal(probabilty, probabilty_2))
    ###########################################
      

    plt.scatter(x=sorted_values, y=probabilty);
    plt.xlabel('   ', fontsize=4)
    plt.ylabel('ECDF', fontsize=16)
    plt.title(label=title, fontsize=40)
    
    return df    

        
    
    
def generate_arousal_extraction_plots(df, idx):
    if idx == 0:
        x_axis = 'PP_Normalized'
        y_axis = 'PP_Arousal'
        title = '$ln PP - \overline{ln PP}_{BL} + 0.5 SD [ln ^{o}c^{2}]$'
        
        diff_mean = pp_diff_mean
        diff_sd = pp_diff_sd
        
    elif idx == 1:
        x_axis = 'PP_Normalized'
        y_axis = 'PP_Arousal_2'
        title = '$ln PP - \overline{ln PP}_{BL} [ln ^{o}c^{2}]$'
        
        diff_mean = pp_diff_mean
        diff_sd = pp_diff_sd
        
    elif idx == 2:
        x_axis = 'HR_Normalized'
        y_axis = 'HR_Arousal'
        title = '$HR - \overline{HR}_{BL} [BPM]$'
        
        diff_mean = hr_diff_mean
        diff_sd = hr_diff_sd
        
    elif idx == 3:
        x_axis = 'BR_Normalized'
        y_axis = 'BR_Arousal'
        title = '$BR - \overline{BR}_{BL} [BPM]$'
        
        diff_mean = br_diff_mean
        diff_sd = br_diff_sd
        
        
    sns_plot = sns.histplot(data=df, 
                 x=x_axis, 
                 hue=y_axis, 
                 palette=palette)
    
    sns_plot.set(title=title,
                xlabel='   ',
                ylabel='   ',
                yticks=[])

    plt.axvline(diff_mean, color='red', lw=line_width)

    plt.axvline(diff_mean + diff_sd, color='black', lw=line_width)
    plt.axvline(diff_mean + 2*diff_sd, color='black', lw=line_width)
    plt.axvline(diff_mean - diff_sd, color='black', lw=line_width)
    plt.axvline(diff_mean - 2*diff_sd, color='black', lw=line_width)

    plt.axvline(diff_mean - 0.5*diff_sd, color='gray', lw=line_width)
    plt.axvline(diff_mean + 0.5*diff_sd, color='gray', lw=line_width)

    plt.legend().set_visible(False)
    

In [46]:
decorator = '----------------------------------------------------'

In [47]:
# def get_test_subjs(arousal_signal):
    
#     ######################################
#     # 'PP', 'PP_2', 'HR', 'BR', 'PP_HR_BR'
#     ######################################
    
#     if arousal_signal == 'PP':
#         #####################################
#         # return [2, 31, 66, 47, 44, 25, 24]
#         #####################################
#         return [18, 23, 16, 25, 8, 45, 2]
    
#     elif arousal_signal == 'PP_2':
#         return [44, 20, 16, 68, 33, 60, 18]
    
#     elif arousal_signal == 'HR':
#         return [61, 29, 24, 38, 84, 2, 17]
    
#     elif arousal_signal == 'BR':
#         return [44, 62, 81, 20, 61, 38, 79]
    
#     elif arousal_signal == 'PP_HR_BR':
#         return [31, 66, 16, 29, 62, 44, 36]

# Standardized Scaled Features

In [48]:
def get_scaled_df(files):
    for file in files:
        scaled_df = pd.DataFrame()
        df = pd.read_csv(all_studies_data_dir + file)
        
        for study in df.Study_Name.unique():
            print(file, study)
            study_df = df.copy()[df.Study_Name == study]
            ###################################################################################################
            for feature in model_features:
                study_df[feature] = StandardScaler().fit_transform(study_df[[feature]])
            ###################################################################################################

            scaled_df = scaled_df.append(study_df)

        scaled_df.to_csv(all_studies_data_dir + file[:-4]+'_scaled.csv', sep=',', index=False)
    
    
# 'data_7_w30_10.csv'[:-4]+'_scaled.csv'

# Data Aggregation

In [49]:
# def get_post_processed_aggregrated_df(aggregrated_df):
#     aggregrated_df = pd.get_dummies(aggregrated_df, columns=['Gender'])
#     aggregrated_df = rename_cols(aggregrated_df)
    
# ###########################################################################################    
# #     aggregrated_df['PP_HR_Arousal_Mode'] = np.where((aggregrated_df['PP_Arousal_Mode'] == aggregrated_df['HR_Arousal_Mode']) &
# #                                           (aggregrated_df['PP_Arousal_Mode'] == 'stressed'), 
# #                                            'stressed', 'relaxed')


# #     aggregrated_df['PP_BR_Arousal_Mode'] = np.where((aggregrated_df['PP_Arousal_Mode'] == aggregrated_df['BR_Arousal_Mode']) &
# #                                               (aggregrated_df['PP_Arousal_Mode'] == 'stressed'), 
# #                                                'stressed', 'relaxed')


# #     aggregrated_df['HR_BR_Arousal_Mode'] = np.where((aggregrated_df['HR_Arousal_Mode'] == aggregrated_df['BR_Arousal_Mode']) &
# #                                               (aggregrated_df['HR_Arousal_Mode'] == 'stressed'), 
# #                                                'stressed', 'relaxed')


# #     aggregrated_df['PP_HR_BR_Arousal_Mode'] = aggregrated_df[['PP_Arousal_Mode',
# #                                               'HR_Arousal_Mode',
# #                                               'BR_Arousal_Mode']].mode(axis=1)[0] 
# ###########################################################################################    
    
#     aggregrated_df.dropna(inplace=True)
#     return aggregrated_df
    

In [50]:
# def get_single_window_aggregrated_df(single_sec_df, window_size=10, rolling_window=False):
#     next_idx = window_size//2 if rolling_window else window_size
#     aggregrated_df = pd.DataFrame()
    
    
#     for study in single_sec_df.Study_Name.unique():
#         study_df = single_sec_df.copy()[single_sec_df.Study_Name == study]

#         # for subj in study_df.Subject.unique():
#         for subj in study_df.Subject.unique()[:1]:

#             print(study, subj)
#             study_subj_df = study_df.copy()[study_df.Subject == subj]

#             for treatment in study_subj_df.Treatment.unique():
#                 treatment_df = study_subj_df.copy()[study_subj_df.Treatment == treatment]

#                 i = 0
#                 totalRows = treatment_df.shape[0]

#                 while i+window_size < totalRows:
#                     treatment_agg_df = treatment_df.copy().iloc[i: min(totalRows, i+window_size)]
#                     first_row = treatment_agg_df.iloc[0]
#                     i += next_idx

#                     temp_df = pd.DataFrame({
#                         'Study_Name': [first_row.Study_Name], 
#                         'Subject': [first_row.Subject], 
#                         'Study_Subject': [first_row.Study_Subject], 
#                         'Treatment': [first_row.Treatment],
#                         'Gender': [first_row.Gender],
#                         'Age': [first_row.Age],
#                         'NASA_Effort': [first_row.NASA_Effort],
#                         'NASA_Frustration': [first_row.NASA_Frustration],
#                         'NASA_Mental': [first_row.NASA_Mental],
#                         'NASA_Performance': [first_row.NASA_Performance],
#                         'NASA_Physical': [first_row.NASA_Physical],
#                         'NASA_Temporal': [first_row.NASA_Temporal],
#                         'NASA_Total': [first_row.NASA_Total],
#                     })

#                     temp_df['PP_Arousal_Mode'] = treatment_agg_df.PP_Arousal.mode()
#                     temp_df['PP_2_Arousal_Mode'] = treatment_agg_df.PP_Arousal_2.mode()
#                     temp_df['HR_Arousal_Mode'] = treatment_agg_df.HR_Arousal.mode()
#                     temp_df['BR_Arousal_Mode'] = treatment_agg_df.BR_Arousal.mode()

#                     temp_df['PP_Mean'] = treatment_agg_df.Perinasal_Log.mean()
#                     temp_df['PP_Median'] = treatment_agg_df.Perinasal_Log.median()
#                     temp_df['PP_SD'] = stdev(treatment_agg_df.Perinasal_Log)
#                     temp_df['PP_SS'] = sum_of_squares(treatment_agg_df.Perinasal_Log)

#                     temp_df['HR_Mean'] = treatment_agg_df.Heart.mean()
#                     temp_df['HR_Median'] = treatment_agg_df.Heart.median()
#                     temp_df['HR_SD'] = stdev(treatment_agg_df.Heart)
#                     temp_df['HR_SS'] = sum_of_squares(treatment_agg_df.Heart)

#                     temp_df['BR_Mean'] = treatment_agg_df.Breathing.mean()
#                     temp_df['BR_Median'] = treatment_agg_df.Breathing.median()
#                     temp_df['BR_SD'] = stdev(treatment_agg_df.Breathing)
#                     temp_df['BR_SS'] = sum_of_squares(treatment_agg_df.Breathing)
                    
#                     pd.get_dummies(final_df, columns=['Gender'])

#                     aggregrated_df = aggregrated_df.append(temp_df)

#     return get_post_processed_aggregrated_df(aggregrated_df)

In [51]:
def get_hist_arousal_voting(df):
    
    #####################################################################################################################
    #                                   Any Modality Stress
    #####################################################################################################################
    # df['PP_HR_Arousal_Mode'] = np.where((df['PP_Arousal_Mode'] == 'stressed') |
    #                                           (df['HR_Arousal_Mode'] == 'stressed'), 
    #                                            'stressed', 'relaxed')

    # df['PP_BR_Arousal_Mode'] = np.where((df['PP_Arousal_Mode'] == 'stressed') |
    #                                           (df['BR_Arousal_Mode'] == 'stressed'), 
    #                                            'stressed', 'relaxed')

    # df['HR_BR_Arousal_Mode'] = np.where((df['HR_Arousal_Mode'] == 'stressed') |
    #                                           (df['BR_Arousal_Mode'] == 'stressed'), 
    #                                            'stressed', 'relaxed')

    # df['PP_HR_BR_Arousal_Mode'] = np.where((df['PP_Arousal_Mode'] == 'stressed') |
    #                                                   (df['HR_Arousal_Mode'] == 'stressed') |
    #                                                   (df['BR_Arousal_Mode'] == 'stressed'),
    #                                                   'stressed', 'relaxed')
    #####################################################################################################################



#     #####################################################################################################################
#     #                                   Two Modalities Stressed (Majority Votes for Stress)
#     #####################################################################################################################
#     df['PP_HR_Arousal_Mode_Hist'] = np.where((df['PP_Arousal_Mode_Hist'] == df['HR_Arousal_Mode_Hist']) &
#                                               (df['PP_Arousal_Mode_Hist'] == 'stressed'), 
#                                                'stressed', 'relaxed')

#     df['PP_BR_Arousal_Mode_Hist'] = np.where((df['PP_Arousal_Mode_Hist'] == df['BR_Arousal_Mode_Hist']) &
#                                               (df['PP_Arousal_Mode_Hist'] == 'stressed'), 
#                                                'stressed', 'relaxed')

#     df['HR_BR_Arousal_Mode_Hist'] = np.where((df['HR_Arousal_Mode_Hist'] == df['BR_Arousal_Mode_Hist']) &
#                                               (df['HR_Arousal_Mode_Hist'] == 'stressed'), 
#                                                'stressed', 'relaxed')

#     df['PP_HR_BR_Arousal_Mode_Hist'] = df[['PP_Arousal_Mode_Hist',
#                                            'HR_Arousal_Mode_Hist',
#                                            'BR_Arousal_Mode_Hist']].mode(axis=1)[0] 
#     #####################################################################################################################



    #####################################################################################################################
    #                        ECDF - Two Modalities Stressed (Majority Votes for Stress)
    #####################################################################################################################
    # df['PP_HR_Arousal_Mode'] = np.where((df['PP_Arousal_Mode'] == df['HR_Arousal_Mode']), 
    #                                            df['PP_Arousal_Mode'], 'neutral')

    # df['PP_BR_Arousal_Mode'] = np.where((df['PP_Arousal_Mode'] == df['BR_Arousal_Mode']), 
    #                                            df['PP_Arousal_Mode'], 'neutral')

    # df['HR_BR_Arousal_Mode'] = np.where((df['HR_Arousal_Mode'] == df['BR_Arousal_Mode']), 
    #                                            df['BR_Arousal_Mode'], 'neutral')

    # df['PP_HR_BR_Arousal_Mode'] = df[['PP_Arousal_Mode',
    #                                               'HR_Arousal_Mode',
    #                                               'BR_Arousal_Mode']].mode(axis=1)[0] 
    #####################################################################################################################


    return df






def get_arousal_cols(df):
    
        df = get_hist_arousal_voting(df)
    
        df.rename(columns = {
            # 'PP_Arousal_Mode_Hist': 'PP_Arousal_Mode',
            # 'HR_Arousal_Mode_Hist': 'HR_Arousal_Mode',
            # 'BR_Arousal_Mode_Hist': 'BR_Arousal_Mode',
            
            # 'PP_HR_Arousal_Mode_Hist': 'PP_HR_Arousal_Mode',
            # 'PP_BR_Arousal_Mode_Hist': 'PP_BR_Arousal_Mode',
            # 'HR_BR_Arousal_Mode_Hist': 'HR_BR_Arousal_Mode',
            # 'PP_HR_BR_Arousal_Mode_Hist': 'PP_HR_BR_Arousal_Mode',

            'PP_Arousal_Mode_ecdf': 'PP_Arousal_Mode',
            'HR_Arousal_Mode_ecdf': 'HR_Arousal_Mode',
            'BR_Arousal_Mode_ecdf': 'BR_Arousal_Mode',
            
            'PP_HR_Arousal_Mode_ecdf': 'PP_HR_Arousal_Mode',
            'PP_BR_Arousal_Mode_ecdf': 'PP_BR_Arousal_Mode',
            'HR_BR_Arousal_Mode_ecdf': 'HR_BR_Arousal_Mode',
            'PP_HR_BR_Arousal_Mode_ecdf': 'PP_HR_BR_Arousal_Mode',

        }, inplace=True)

        print_percentage(df, 'PP_Arousal_Mode')
        print_percentage(df, 'HR_Arousal_Mode')
        print_percentage(df, 'BR_Arousal_Mode')
 
        print_percentage(df, 'PP_HR_Arousal_Mode')
        print_percentage(df, 'PP_BR_Arousal_Mode')
        print_percentage(df, 'HR_BR_Arousal_Mode')
        print_percentage(df, 'PP_HR_BR_Arousal_Mode')
        
        return df
    

In [None]:
def generate_ecdf_arousal_cols(df, signals):
    
    combined_signals=[]
    for i in range(2, 4):
        for signal_subset in itertools.combinations(signals, i):
            combined_signal = '_'.join(signal_subset) 
            combined_signals.append(combined_signal)
            df[combined_signal+'_ecdf'] = df[[signal+'_ecdf' for signal in signal_subset]].mean(axis=1)
            
            # print(signal_subset, combined_signal)
            # print([signal+'_ecdf' for signal in signal_subset])
             
    signals += combined_signals     
    
    for signal in signals:
        df.loc[
            df[signal+'_ecdf'] < non_arousal_threshold_ecdf, 
            signal+'_Arousal_ecdf'] = 'relaxed' # 'non-arousal'

        df.loc[
            (df[signal+'_ecdf'] >= non_arousal_threshold_ecdf) & 
            (df[signal+'_ecdf'] < arousal_threshold_ecdf), 
            signal+'_Arousal_ecdf'] = 'neutral'

        df.loc[
            df[signal+'_ecdf'] >= arousal_threshold_ecdf, 
            signal+'_Arousal_ecdf'] = 'stressed' # 'arousal'
        
        
        
        if running_study=='sim1':
            one_sd_non_arousal = 0.15
            one_sd_arousal = 1-one_sd_non_arousal
            
            df.loc[
                df[signal+'_ecdf'] < one_sd_non_arousal, 
                signal+'_Arousal_ecdf_1sd'] = 'relaxed' # 'non-arousal'

            df.loc[
                (df[signal+'_ecdf'] >= one_sd_non_arousal) & 
                (df[signal+'_ecdf'] < one_sd_arousal), 
                signal+'_Arousal_ecdf_1sd'] = 'neutral'

            df.loc[
                df[signal+'_ecdf'] >= one_sd_arousal, 
                signal+'_Arousal_ecdf_1sd'] = 'stressed' # 'arousal'

        

        print_percentage(df, signal+'_Arousal_ecdf')
    
     
    
    return df

In [None]:
def get_non_overlapping_aggregrated_df(df):
    
    # ------------------------------------------
    # THIS IS NEEDED FOR COMBINED STUDIES
    # ------------------------------------------
    df = df.copy().rename(columns={
         'NASA_Effort': 'Effort',
         'NASA_Frustration': 'Frustration', 
         'NASA_Performance': 'Performance',
        
         'NASA_Mental': 'Mental Demand',
         'NASA_Physical': 'Physical Demand',
         'NASA_Temporal': 'Temporal Demand', 
         'NASA_Total': 'NASA Total Sum',
    })

    
    aggregrated_df = pd.DataFrame()
    
    print('Subject Processing: ')
    # for subj in df.Subject.unique():
    for subj in df.Subject.unique()[:8]:
        print(subj)
        
        for treatment in df.Treatment.unique():
            subj_treatment_df = df.copy()[(df.Subject == subj) & (df.Treatment == treatment )]

            i = 0
            totalRows = subj_treatment_df.shape[0]
            
            while i+2 < totalRows: # proceed for atleast 3 rows
                subj_treatment_aggregrated_df = subj_treatment_df.copy().iloc[i: min(totalRows, i+10)]
                subj_treatment_window_first_row = subj_treatment_aggregrated_df.iloc[0]
                i += 10
                
                temp_df = pd.DataFrame({'Subject': [subj], 
                                        'Treatment': [treatment], 
                                        'Gender': [subj_treatment_window_first_row.Gender],
                                        'Age': [subj_treatment_window_first_row.Age],
                                        # 'Treatment': [subj_treatment_window_first_row.Treatment],
                                        # 'STAI': [subj_treatment_window_first_row.STAI],
                                        # 'Type_AB': [subj_treatment_window_first_row.Type_AB],
                                        'Effort': [subj_treatment_window_first_row.Effort],
                                        'Frustration': [subj_treatment_window_first_row.Frustration],
                                        'Mental_Demand': [subj_treatment_window_first_row['Mental Demand']],
                                        'Performance': [subj_treatment_window_first_row['Performance']],
                                        'Physical_Demand': [subj_treatment_window_first_row['Physical Demand']],
                                        'Temporal_Demand': [subj_treatment_window_first_row['Temporal Demand']],
                                        'NASA_Total_Sum': [subj_treatment_window_first_row['NASA Total Sum']]
                                       })
                
                # temp_df['PP_Arousal_Mode'] = subj_drive_window_df.PP_Arousal.mode()
                # temp_df['PP_2_Arousal_Mode'] = subj_drive_window_df.PP_Arousal_2.mode()
                # temp_df['HR_Arousal_Mode'] = subj_drive_window_df.HR_Arousal.mode()
                # temp_df['BR_Arousal_Mode'] = subj_drive_window_df.BR_Arousal.mode()
                
                
                # temp_df['PP_Arousal_Mode_Hist'] = subj_treatment_aggregrated_df.PP_Arousal.mode()
                # ### temp_df['PP_2_Arousal_Mode_Hist'] = subj_treatment_aggregrated_df.PP_Arousal_2.mode()
                # temp_df['HR_Arousal_Mode_Hist'] = subj_treatment_aggregrated_df.HR_Arousal.mode()
                # temp_df['BR_Arousal_Mode_Hist'] = subj_treatment_aggregrated_df.BR_Arousal.mode()
                
                
                temp_df['PP_Arousal_Mode_ecdf'] = subj_treatment_aggregrated_df.PP_Arousal_ecdf.mode()
                temp_df['HR_Arousal_Mode_ecdf'] = subj_treatment_aggregrated_df.HR_Arousal_ecdf.mode()
                temp_df['BR_Arousal_Mode_ecdf'] = subj_treatment_aggregrated_df.BR_Arousal_ecdf.mode()
                
                if running_study == 'sim1':
                    temp_df['PP_Arousal_Mode_1sd'] = subj_treatment_aggregrated_df.PP_Arousal_ecdf_1sd.mode()
                    temp_df['HR_Arousal_Mode_1sd'] = subj_treatment_aggregrated_df.HR_Arousal_ecdf_1sd.mode()
                    temp_df['BR_Arousal_Mode_1sd'] = subj_treatment_aggregrated_df.BR_Arousal_ecdf_1sd.mode()
                
                temp_df['PP_HR_Arousal_Mode_ecdf'] = subj_treatment_aggregrated_df.PP_HR_Arousal_ecdf.mode()
                temp_df['PP_BR_Arousal_Mode_ecdf'] = subj_treatment_aggregrated_df.PP_BR_Arousal_ecdf.mode()
                temp_df['HR_BR_Arousal_Mode_ecdf'] = subj_treatment_aggregrated_df.HR_BR_Arousal_ecdf.mode()
                temp_df['PP_HR_BR_Arousal_Mode_ecdf'] = subj_treatment_aggregrated_df.PP_HR_BR_Arousal_ecdf.mode()
                
                
# ###################################################################################################################                
#                 temp_df['PP_Mean_NN'] = subj_treatment_aggregrated_df.Perinasal_Log.mean()
#                 temp_df['PP_Median_NN'] = subj_treatment_aggregrated_df.Perinasal_Log.median()
#                 temp_df['PP_SD_NN'] = stdev(subj_treatment_aggregrated_df.Perinasal_Log)
#                 temp_df['PP_SS_NN'] = sum_of_squares(subj_treatment_aggregrated_df.Perinasal_Log)
                
#                 temp_df['Hr_Mean_NN'] = subj_treatment_aggregrated_df.Heart.mean()
#                 temp_df['Hr_Median_NN'] = subj_treatment_aggregrated_df.Heart.median()
#                 temp_df['Hr_SD_NN'] = stdev(subj_treatment_aggregrated_df.Heart)
#                 temp_df['Hr_SS_NN'] = sum_of_squares(subj_treatment_aggregrated_df.Heart)
                
#                 temp_df['Br_Mean_NN'] = subj_treatment_aggregrated_df.Breathing.mean()
#                 temp_df['Br_Median_NN'] = subj_treatment_aggregrated_df.Breathing.median()
#                 temp_df['Br_SD_NN'] = stdev(subj_treatment_aggregrated_df.Breathing)
#                 temp_df['Br_SS_NN'] = sum_of_squares(subj_treatment_aggregrated_df.Breathing)
# ###################################################################################################################

                
                # 'PP_Normalized', 'HR_Normalized', 'BR_Normalized'    
                temp_df['PP_Mean'] = subj_treatment_aggregrated_df.PP_Normalized.mean()
                temp_df['PP_Median'] = subj_treatment_aggregrated_df.PP_Normalized.median()
                temp_df['PP_SD'] = stdev(subj_treatment_aggregrated_df.PP_Normalized)
                temp_df['PP_SS'] = sum_of_squares(subj_treatment_aggregrated_df.PP_Normalized)

                temp_df['Hr_Mean'] = subj_treatment_aggregrated_df.HR_Normalized.mean()
                temp_df['Hr_Median'] = subj_treatment_aggregrated_df.HR_Normalized.median()
                temp_df['Hr_SD'] = stdev(subj_treatment_aggregrated_df.HR_Normalized)
                temp_df['Hr_SS'] = sum_of_squares(subj_treatment_aggregrated_df.HR_Normalized)
                
                temp_df['Br_Mean'] = subj_treatment_aggregrated_df.BR_Normalized.mean()
                temp_df['Br_Median'] = subj_treatment_aggregrated_df.BR_Normalized.median()
                temp_df['Br_SD'] = stdev(subj_treatment_aggregrated_df.BR_Normalized)
                temp_df['Br_SS'] = sum_of_squares(subj_treatment_aggregrated_df.BR_Normalized)
                
                

                aggregrated_df = aggregrated_df.append(temp_df)
       
      
    aggregrated_df = pd.get_dummies(aggregrated_df, columns=['Gender'])
    aggregrated_df = get_arousal_cols(aggregrated_df)
    
    return aggregrated_df

In [54]:
def get_multiple_window_aggregrated_df(single_sec_df, 
                                       train_window_size, 
                                       test_window_size, 
                                       rolling_window):
    
    total_window_size = train_window_size + test_window_size
    next_idx = test_window_size if rolling_window else total_window_size
    
    aggregrated_df = pd.DataFrame()
    print(single_sec_df.shape)
    
    for study in single_sec_df.Study_Name.unique():
        
        if study in all_study_combinations:
            study_df = single_sec_df.copy()[single_sec_df.Study_Name == study]

            for subj in study_df.Subject.unique():
    #         for subj in study_df.Subject.unique()[:1]:

                print(study, subj)
                study_subj_df = study_df.copy()[study_df.Subject == subj]

                for treatment in study_subj_df.Treatment.unique():
                    treatment_df = study_subj_df.copy()[study_subj_df.Treatment == treatment]
                    totalRows = treatment_df.shape[0]
                    i = 0

                    while i+total_window_size <= totalRows: # proceed for total_window_size rows
                        treatment_agg_df = treatment_df.copy().iloc[i: i+train_window_size]
                        label_agg_df = treatment_df.copy().iloc[i+train_window_size: i+total_window_size]

                        # print(i, i+train_window_size, i+total_window_size)

                        first_row = treatment_agg_df.iloc[0]
                        i += next_idx

                        temp_df = pd.DataFrame({
                            'Study_Name': [first_row.Study_Name], 
                            'Subject': [first_row.Subject], 
                            'Study_Subject': [first_row.Study_Subject], 
                            'Treatment': [first_row.Treatment],
                            'Gender': [first_row.Gender],
                            'Age': [first_row.Age],
                            'NASA_Effort': [first_row.NASA_Effort],
                            'NASA_Frustration': [first_row.NASA_Frustration],
                            'NASA_Mental': [first_row.NASA_Mental],
                            'NASA_Performance': [first_row.NASA_Performance],
                            'NASA_Physical': [first_row.NASA_Physical],
                            'NASA_Temporal': [first_row.NASA_Temporal],
                            'NASA_Total': [first_row.NASA_Total],
                        })
                        
                        for col in ['Perinasal', 'Heart', 'Breathing']:
#                             ###################################################################################################################
#                             temp_df[col+'_Mean_NN'] = treatment_agg_df[col].mean()
#                             temp_df[col+'_Median_NN'] = treatment_agg_df[col].median()
#                             temp_df[col+'_SD_NN'] = stdev(treatment_agg_df[col])
#                             temp_df[col+'_SS_NN'] = sum_of_squares(treatment_agg_df[col])
                            
#                             temp_df[col+'_Window_NN'] = ', '.join(str(v) for v in treatment_agg_df[col].values) 
#                             ###################################################################################################################

                            
                            
                            ###################################################################################################################
                            ### last_10_sec_df = treatment_agg_df.copy().tail(10)
                            ### temp_df[col+'_Mean_10s'] = last_10_sec_df[col].mean()
                            ### temp_df[col+'_Median_10s'] = last_10_sec_df[col].median()
                            ### temp_df[col+'_SD_10s'] = stdev(last_10_sec_df[col])
                            ### temp_df[col+'_SS_10s'] = sum_of_squares(last_10_sec_df[col])
                            ###################################################################################################################


                            
                            ###################################################################################################################
                            # if col=='Perinasal':
                            #     norm_col='PP'
                            # elif col=='Heart':
                            #     norm_col='HR'
                            # elif col=='Breathing':
                            #     norm_col='BR'
                                
                            norm_col_dict = {
                                'Perinasal': 'PP',
                                'Heart': 'HR',
                                'Breathing': 'BR',
                            }    
                            
                            temp_df[col+'_Mean'] = treatment_agg_df[norm_col_dict[col]+'_Normalized'].mean()
                            temp_df[col+'_Median'] = treatment_agg_df[norm_col_dict[col]+'_Normalized'].median()
                            temp_df[col+'_SD'] = stdev(treatment_agg_df[norm_col_dict[col]+'_Normalized'])
                            temp_df[col+'_SS'] = sum_of_squares(treatment_agg_df[norm_col_dict[col]+'_Normalized'])

                            temp_df[col+'_Window'] = ', '.join(str(v) for v in treatment_agg_df[norm_col_dict[col]+'_Normalized'].values) 
                            ###################################################################################################################
                            
                            
                            

                        ###########################################################################
                        #                 This is for labeling
                        ###########################################################################
                        # temp_df['PP_Arousal_Mode'] = label_agg_df.PP_Arousal.mode()
                        # temp_df['PP_2_Arousal_Mode'] = label_agg_df.PP_Arousal_2.mode()
                        # temp_df['HR_Arousal_Mode'] = label_agg_df.HR_Arousal.mode()
                        # temp_df['BR_Arousal_Mode'] = label_agg_df.BR_Arousal.mode()

                        ###########################################################################
                        # temp_df['PP_Arousal_Mode_Hist'] = label_agg_df.PP_Arousal.mode()
                        # temp_df['HR_Arousal_Mode_Hist'] = label_agg_df.HR_Arousal.mode()
                        # temp_df['BR_Arousal_Mode_Hist'] = label_agg_df.BR_Arousal.mode()

                        temp_df['PP_Arousal_Mode_ecdf'] = label_agg_df.PP_Arousal_ecdf.mode()
                        temp_df['HR_Arousal_Mode_ecdf'] = label_agg_df.HR_Arousal_ecdf.mode()
                        temp_df['BR_Arousal_Mode_ecdf'] = label_agg_df.BR_Arousal_ecdf.mode()

                        temp_df['PP_HR_Arousal_Mode_ecdf'] = label_agg_df.PP_HR_Arousal_ecdf.mode()
                        temp_df['PP_BR_Arousal_Mode_ecdf'] = label_agg_df.PP_BR_Arousal_ecdf.mode()
                        temp_df['HR_BR_Arousal_Mode_ecdf'] = label_agg_df.HR_BR_Arousal_ecdf.mode()
                        temp_df['PP_HR_BR_Arousal_Mode_ecdf'] = label_agg_df.PP_HR_BR_Arousal_ecdf.mode()
                        ###########################################################################


                        aggregrated_df = aggregrated_df.append(temp_df)

       
    # print(aggregrated_df)
    aggregrated_df = pd.get_dummies(aggregrated_df, columns=['Gender'])
    aggregrated_df = rename_cols(aggregrated_df)
    aggregrated_df = get_arousal_cols(aggregrated_df)  
    # print(aggregrated_df)
                    
    print(aggregrated_df.shape)
    # return get_post_processed_aggregrated_df(aggregrated_df)
    return aggregrated_df

In [55]:

                    # temp_df = pd.DataFrame({
                    #     'Study_Name': [first_row.Study_Name], 
                    #     'Subject': [first_row.Subject], 
                    #     'Study_Subject': [first_row.Study_Subject], 
                    #     'Treatment': [first_row.Treatment],
                    #     'Gender': [first_row.Gender],
                    #     'Age': [first_row.Age],
                    #     'NASA_Effort': [first_row.NASA_Effort],
                    #     'NASA_Frustration': [first_row.NASA_Frustration],
                    #     'NASA_Mental': [first_row.NASA_Mental],
                    #     'NASA_Performance': [first_row.NASA_Performance],
                    #     'NASA_Physical': [first_row.NASA_Physical],
                    #     'NASA_Temporal': [first_row.NASA_Temporal],
                    #     'NASA_Total': [first_row.NASA_Total],
                    # })
        
                        
        
        
        
#                     temp_df['PP_Mean'] = treatment_agg_df.Perinasal_Log.mean()
#                     temp_df['PP_Median'] = treatment_agg_df.Perinasal_Log.median()
#                     temp_df['PP_SD'] = stdev(treatment_agg_df.Perinasal_Log)
#                     temp_df['PP_SS'] = sum_of_squares(treatment_agg_df.Perinasal_Log)

#                     temp_df['HR_Mean'] = treatment_agg_df.Heart.mean()
#                     temp_df['HR_Median'] = treatment_agg_df.Heart.median()
#                     temp_df['HR_SD'] = stdev(treatment_agg_df.Heart)
#                     temp_df['HR_SS'] = sum_of_squares(treatment_agg_df.Heart)

#                     temp_df['BR_Mean'] = treatment_agg_df.Breathing.mean()
#                     temp_df['BR_Median'] = treatment_agg_df.Breathing.median()
#                     temp_df['BR_SD'] = stdev(treatment_agg_df.Breathing)
#                     temp_df['BR_SS'] = sum_of_squares(treatment_agg_df.Breathing)
    
                    
                    
#                     for col in ['Perinasal_Log', 'Heart', 'Breathing']:
#                         # temp_df[col+'_Window'] = [treatment_agg_df[col].values]
#                         temp_df[col+'_Window'] = ', '.join(str(v) for v in treatment_agg_df[col].values)  
                    

In [56]:
def generate_file_name_with_window_info(file_name, 
                                        train_window_size, 
                                        test_window_size, 
                                        rolling):
    
    final_file_name = file_name[:-4]
    if rolling: final_file_name+='_rolling'
    
    return final_file_name+'_w'+str(train_window_size)+'_'+str(test_window_size)+'.csv'
    
    
    


# Deep Neural Network

In [None]:
model_features_static = model_features.copy()
# model_features_static += ['PP_Mean_10s',
#                           'PP_SD_10s',
#                           'Heart_Mean_10s',
#                           'Heart_SD_10s',
#                           'Breathing_Mean_10s',
#                           'Breathing_SD_10s']

model_features_ts = ['Perinasal_Window', 'Heart_Window', 'Breathing_Window']

dnn_features = model_features_static + model_features_ts

In [None]:
def process_ts_features(df):
    for model_feature_ts in model_features_ts:
        df[model_feature_ts] = df[model_feature_ts].apply(lambda x: np.array(x.split(', ')).astype('float64'))
    
    return df


def create_categorical_labels(df):
    for arousal_signal in arousal_signals:
        label = arousal_signal + '_Arousal_Mode'
        label_cat = label + '_Cat'
        
        df[label_cat] = df[label].replace(['relaxed','stressed', 'neutral'],
                                          [0, 1, -1])

        # df[label_cat] = np.where(df[label] == 'relaxed', 0, 1)
    
    return df


def split_static_and_ts_features_as_matrix(df):
    return df[model_features_static].values, np.array(df[model_features_ts].values.tolist())

def model_input_data(X):
    X_static, X_ts = split_static_and_ts_features_as_matrix(X)
    return [np.asarray(X_ts).astype('float32'), np.asarray(X_static).astype('float32')]




In [2]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall


def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision


def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


# def auc_m(y_true, y_pred):
#     return tf.py_function(roc_auc_score, (y_true, y_pred), tf.double)



In [None]:
def get_dnn_model():
    if model_name == 'LSTM':
        return get_lstm_model()
    # elif model_name == 'DNN':
    #     return get_dnn_v1_model()

In [None]:
# focal loss
def focal_loss_custom(alpha, gamma):
    def binary_focal_loss(y_true, y_pred):
        fl = tf_a.losses.SigmoidFocalCrossEntropy(alpha=alpha, gamma=gamma)
        y_true_K = tf.ones_like(y_true)
        focal_loss = fl(y_true, y_pred)
        return focal_loss
    return binary_focal_loss

def get_compiled_model(model):
        # binary cross entropy loss
        # model.compile(loss='binary_crossentropy', 
        #               optimizer='adam', 
        #               metrics=['accuracy', f1_m, precision_m, recall_m])
    
    
        # model.compile(loss=focal_loss_custom(alpha=0.2, gamma=2.0), optimizer='adam', metrics=['accuracy',f1_m,precision_m, recall_m])
        model.compile(loss=focal_loss_custom(alpha=0.2, gamma=2.0), 
                      optimizer='adam', 
                      metrics=[
                          'accuracy',

                          keras.metrics.AUC(),

                          f1_m,
                          precision_m,
                          recall_m,

                           # keras.metrics.AUC(),
                           # keras.metrics.Precision(),
                           # keras.metrics.Recall()
                      ])
        
        return model

In [1]:
def get_lstm_model():
    # Define timesteps and the number of features
    n_timesteps = 3
    n_features = int(dnn_window_size[:2])
    n_static_features = len(model_features_static)  ## X_train_static.shape[1] = 13
    n_outputs = 1

    # RNN + SLP Model
    # Define input layer
    recurrent_input = Input(shape=(n_timesteps, n_features),name='TIMESERIES_INPUT')
    static_input = Input(shape=(n_static_features, ),name='STATIC_INPUT') 

    #CNN Layers

    conv_layer_one = Conv1D(filters=32, kernel_size=8, strides=1, activation='relu', padding='same')(recurrent_input)
    pool_layer_one = MaxPooling1D(pool_size = 2, name ='POOLING_LAYER_1')(conv_layer_one)

    conv_layer_two = Conv1D(filters=16, kernel_size=4, strides=1, activation='relu', padding='same')(pool_layer_one)
    pool_layer_two = MaxPooling1D(pool_size = 1, name ='POOLING_LAYER_2')(conv_layer_two)

    # RNN Layers
    # layer - 1
    rec_layer_one = Bidirectional(LSTM(128, kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01),return_sequences=True),name ='BIDIRECTIONAL_LAYER_1')(pool_layer_two)
    rec_layer_one = Dropout(0.1,name ='DROPOUT_LAYER_1')(rec_layer_one)

    # layer - 2
    rec_layer_two = Bidirectional(LSTM(64, kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01)),name ='BIDIRECTIONAL_LAYER_2')(rec_layer_one)
    rec_layer_two = Dropout(0.1, name ='DROPOUT_LAYER_2')(rec_layer_two)

    # SLP Layers
    static_layer_one = Dense(64, kernel_regularizer=l2(0.001), activation='relu', name='DENSE_LAYER_1')(static_input)

    # Combine layers - RNN + SLP
    combined = Concatenate(axis= 1,name = 'CONCATENATED_TIMESERIES_STATIC')([rec_layer_two,static_layer_one])

    # flatten_layer = Flatten()(combined)

    combined_dense_two = Dense(64, activation='relu',name='DENSE_LAYER_2')(combined)
    combined_dense_three = Dense(32, activation='relu',name='DENSE_LAYER_3')(combined_dense_two)
    output = Dense(n_outputs, activation='sigmoid', name='OUTPUT_LAYER')(combined_dense_three)
    # output = Dense(n_outputs, activation='softmax', name='OUTPUT_LAYER')(combined_dense_three)

    # Compile ModeL
    model = Model(inputs=[recurrent_input, static_input], outputs=[output])
    model = get_compiled_model(model)
    # model.summary()
    
    return model



In [None]:
def get_dnn_model_and_metrics(df, arousal_signal, k_fold=False):
    
    #####################################################################################
    # arousal_col = arousal_signal + '_Arousal_Mode_Cat'
    df['Arousal_Mode'] = df[arousal_signal + '_Arousal_Mode_Cat']
    #####################################################################################

    
    X_all = df.copy()[dnn_features]
    y_all = df.copy()['Arousal_Mode']
    
    # print(y_all.to_string())
    # print(y_all.unique())
    
    X_train, X_valid, y_train, y_valid = train_test_split(X_all, y_all, test_size=0.2, random_state=42)
    
    
    dnn_model_name = add_path(add_path(all_studies_data_dir, models_dir), dnn_window_size)+'lstm_'+arousal_signal+'_'+study_combination
    if os.path.exists(dnn_model_name+'.json'):
        # -------------------------------------------
        # Load from the saved model and weights
        # -------------------------------------------
        print('Loading from saved model and weights.....')
        json_file = open(dnn_model_name+'.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
    
        model = model_from_json(loaded_model_json)
        model.load_weights(dnn_model_name+'.h5')
        model = get_compiled_model(model)
        
    else:
        cur_epochs = 1 if k_fold else epochs
        model = get_dnn_model()
        
        history =  model.fit(model_input_data(X_train),
                         y_train, 
                         epochs=cur_epochs, 
                         batch_size=batch_size, 
                         verbose=verbose,
                         validation_data=(model_input_data(X_valid), y_valid)
                        )
        
        # -------------------------------------------
        # Download the model and weights
        # -------------------------------------------
        model_json = model.to_json()
        with open(dnn_model_name+'.json', 'w') as json_file:
            json_file.write(model_json)
        model.save_weights(dnn_model_name+'.h5')
    
    
    ######################################################################
    loss, accuracy, auc, f1_score, precision, recall = model.evaluate(model_input_data(X_valid), y_valid, verbose=0)
    ######################################################################
    

    
    
    ######################################################################
    y_prob = model.predict(model_input_data(X_valid))
    y_pred = np.where(y_prob > prediction_threshold, 1, 0).flatten().tolist()
    
    Accuracy, AUC, F1, Recall, Precision, Specificity = get_metrics(model, X_valid, y_valid, y_pred, y_prob.flatten().tolist())
    
    
    print('\n------------------------ Train Study ------------------------') 
    print('Train Data - True --> ')
    print(collections.Counter(df[arousal_signal + '_Arousal_Mode_Cat']))

    print('\nValidation Data - True --> ')
    print(collections.Counter(y_valid))
    
    print('\nValidation Data - Prediction --> ')
    print(collections.Counter(y_pred))
    ######################################################################
    
    
    
    
    return model, {'loss': loss,
                   
                   'accuracy': accuracy,
                   'auc': auc,
                   'f1_score': f1_score, 
                   'precision': precision, 
                   'recall': recall,
                   
                   'Accuracy': Accuracy, 
                   'AUC': AUC,
                   'F1': F1, 
                   'Precision': Precision, 
                   'Recall': Recall,
                   'Specificity': Specificity,
                  }

In [None]:
def train_dnn_model(df, model_features, arousal_signal, model_train_method, study_subjects=None):

    
    # #####################################################################################
    # # arousal_col = arousal_signal + '_Arousal_Mode_Cat'
    # df['Arousal_Mode'] = df[arousal_signal + '_Arousal_Mode_Cat']
    # #####################################################################################

    train_df = df.copy()[df.Study_Subject.isin(study_subjects)]
    
    #####################################################################################
    #                               MODELING
    #####################################################################################
    model, metrics = get_dnn_model_and_metrics(train_df, arousal_signal, k_fold=True)
    
    
    
    #####################################################################################
    model_metrics[arousal_signal]['K_Fold']['Accuracy'].append(metrics['Accuracy'])
    model_metrics[arousal_signal]['K_Fold']['F1'].append(metrics['F1'])
    model_metrics[arousal_signal]['K_Fold']['Recall'].append(metrics['Recall'])
    model_metrics[arousal_signal]['K_Fold']['Precision'].append(metrics['Precision'])
    model_metrics[arousal_signal]['K_Fold']['AUC'] = metrics['AUC']
    model_metrics[arousal_signal]['K_Fold']['Specificity'] = metrics['Specificity']
    #####################################################################################
    
                                       

In [None]:
def get_trained_dnn_model(df, arousal_signal):
    
    # print('----------------------------------------> Train Studies: ' + str(df.Study_Name.unique()))
    
    #############################################################
    #                       K-Fold Models
    #############################################################
    if run_k_fold:
        study_subjects = np.array(df.Study_Subject.unique())
        # print(study_subjects)
        for i, indices in enumerate(KFold(n_splits=k_fold_n_splits).split(study_subjects)):
            print('Running ' + str(k_fold_n_splits) + '-fold iteration: ' + str(i+1) + '\n')
            # print(study_subjects[indices[1]])
            train_dnn_model(df, model_features, arousal_signal, model_train_method, study_subjects[indices[1]])
            # print(model_metrics)

        print_metrics(model_metrics)    
    #############################################################    
        
        
        
    #############################################################
    #          Final Model to Run on Test Study    
    #############################################################    
    model, metrics = get_dnn_model_and_metrics(df, arousal_signal)

    model_metrics[arousal_signal]['Train']['Accuracy'] = metrics['Accuracy']
    model_metrics[arousal_signal]['Train']['AUC'] = metrics['AUC']
    model_metrics[arousal_signal]['Train']['F1'] = metrics['F1']
    model_metrics[arousal_signal]['Train']['Precision'] = metrics['Precision']
    model_metrics[arousal_signal]['Train']['Recall'] = metrics['Recall']
    model_metrics[arousal_signal]['Train']['Specificity'] = metrics['Specificity']

    return model




def test_dnn_model(model, test_study, test_df, arousal_signal):
    test_df['Arousal_Mode'] = test_df[arousal_signal + '_Arousal_Mode_Cat']
    
    X_test = test_df[dnn_features]
    y_test = test_df['Arousal_Mode']
    
    
    ######################################################################
    loss, accuracy, auc, f1_score, precision, recall = model.evaluate(model_input_data(X_test), y_test, verbose=0)
    ######################################################################
    
    
    ######################################################################
    y_prob = model.predict(model_input_data(X_test))
    y_pred = np.where(y_prob > prediction_threshold, 1,0).flatten().tolist()
    
    Accuracy, AUC, F1, Recall, Precision, Specificity = get_metrics(model, X_test, y_test, y_pred, y_prob.flatten().tolist())
    
    print('Test Data - True --> ')
    print(collections.Counter(y_test))
    
    print('\nTest Data - Prediction --> ')
    print(collections.Counter(y_pred))
    ######################################################################
    
    
    model_metrics[arousal_signal]['Test'][test_study.upper()]['Accuracy'] = Accuracy
    model_metrics[arousal_signal]['Test'][test_study.upper()]['AUC'] = AUC
    model_metrics[arousal_signal]['Test'][test_study.upper()]['F1'] = F1
    model_metrics[arousal_signal]['Test'][test_study.upper()]['Recall'] = Recall
    model_metrics[arousal_signal]['Test'][test_study.upper()]['Precision'] = Precision
    model_metrics[arousal_signal]['Test'][test_study.upper()]['Specificity'] = Specificity
    
    # if print_all: print('Accuracy: %.2f \nAUC: %.2f \nF1: %.2f \nRecall: %.2f \nPrecision: %.2f \nSpecificity: %.2f\n' % (accuracy, auc, f1_score, recall, precision, specificity))
    