In [28]:
import os

import pandas as pd
import numpy as np


%run us_common_functions.ipynb

In [29]:
file_paths = {
    'sim1': sim1_data_dir,
    'sim2': sim2_data_dir,
    'tt1': tt1_data_dir,
    'office_tasks': office_tasks_data_dir,
    'deadline_study': deadline_study_data_dir
}


file_names = [
    ### 'data_2.csv',     ### Merged Data --> Physiological + Psychometric + Biographic
    ### 'data_2.1.csv',   ### Merged Data + TimeElapsed column
    ### 'data_2.2.csv',   ### Merged Data --> QC Data (QC1 and Log Transformed)
    ### 'data_3.1',       ### Single window - 10 sec - aggregation + standardized scale

    
    'rb_data.csv',      ### Resting Baseline Mean
    'data_2.5.csv',     ### 1 sec resolution - labeled data
    'data_3.csv',       ### Single window - 10 sec - aggregation
]



In [30]:
base_cols = [
    'Subject', 
    'Age',        

    'Effort',
    'Frustration', 
    'Mental Demand', 
    'Performance', 
    'Physical Demand',
    'Temporal Demand', 
    'NASA Total Sum'
]


def get_columns(file_name):
    if file_name == 'data_1.csv':
        return []
    
    
    elif file_name == 'data_2.csv':
        return base_cols + [
            'Gender',
            'Perinasal',
            'Heart',
            'Breathing']
    
    elif file_name in ['data_2.1.csv']:
        return base_cols + [
            'TimeElapsed',
            'Treatment',
            
            'Gender',
            'Perinasal',
            'Heart',
            'Breathing']
    
    elif file_name in ['data_2.2.csv', 'data_2.3.csv']:
        return base_cols + [
            'TimeElapsed',
            'Treatment',
            
            'Gender',
            'Perinasal',
            'Perinasal_Log',
            'Heart',
            'Breathing']
    
    
    
    elif file_name == 'rb_data.csv':
        cols = [
            'Subject',
            'Treatment',
            'PP_RB_Mean',
            'HR_RB_Mean',
            'BR_RB_Mean',
        ]
    
    elif file_name == 'data_2.5.csv':
        return base_cols + [
            'TimeElapsed',
            'Treatment',
            
            'Gender',
            
            'Perinasal',
            'Perinasal_Log',
            'Heart',
            'Breathing',
            
            'PP_Normalized', 
            'HR_Normalized', 
            'BR_Normalized',
            
            'PP_Arousal', 
            'HR_Arousal', 
            'BR_Arousal', 
            
            'PP_ecdf',
            'HR_ecdf', 
            'BR_ecdf',    
            'PP_HR_ecdf', 
            'PP_BR_ecdf', 
            'HR_BR_ecdf',
            'PP_HR_BR_ecdf',
            
            'PP_Arousal_ecdf', 
            'HR_Arousal_ecdf',
            'BR_Arousal_ecdf', 
            'PP_HR_Arousal_ecdf', 
            'PP_BR_Arousal_ecdf',
            'HR_BR_Arousal_ecdf', 
            'PP_HR_BR_Arousal_ecdf'
        ]
    
        
        
    elif file_name == 'data_3.csv':
        cols = base_cols + [
             'Treatment',
            
             'Gender_Female',
             'Gender_Male',

            
            
#              ##################################
#              'Perinasal_Mean_NN',
#              'Perinasal_Median_NN', 
#              'Perinasal_SD_NN', 
#              'Perinasal_SS_NN',
            
#              'Hr_Mean_NN', 
#              'Hr_Median_NN', 
#              'Hr_SD_NN',
#              'Hr_SS_NN', 

#              'Br_Mean_NN', 
#              'Br_Median_NN', 
#              'Br_SD_NN', 
#              'Br_SS_NN', 
#              ##################################            
            
            
            
            
            
             'PP_Mean',
             'PP_Median', 
             'PP_SD', 
             'PP_SS',
            
             'Hr_Mean', 
             'Hr_Median', 
             'Hr_SD',
             'Hr_SS', 

             'Br_Mean', 
             'Br_Median', 
             'Br_SD', 
             'Br_SS', 
             

#              'PP_Arousal_Mode_Hist',
#              'HR_Arousal_Mode_Hist',
#              'BR_Arousal_Mode_Hist',

#              'PP_HR_Arousal_Mode_Hist',
#              'PP_BR_Arousal_Mode_Hist',
#              'HR_BR_Arousal_Mode_Hist',
#              'PP_HR_BR_Arousal_Mode_Hist',
            
            

#              'PP_Arousal_Mode_ecdf',
#              'HR_Arousal_Mode_ecdf',
#              'BR_Arousal_Mode_ecdf',

#              'PP_HR_Arousal_Mode_ecdf',
#              'PP_BR_Arousal_Mode_ecdf',
#              'HR_BR_Arousal_Mode_ecdf',
#              'PP_HR_BR_Arousal_Mode_ecdf',
            
            
            
             # Based on ECDF
             'PP_Arousal_Mode', 
             'HR_Arousal_Mode', 
             'BR_Arousal_Mode',

             'PP_HR_Arousal_Mode',
             'PP_BR_Arousal_Mode', 
             'HR_BR_Arousal_Mode', 
             'PP_HR_BR_Arousal_Mode'
        ]
        
    return list(map(lambda x: x.replace(' ', '_'), cols))


In [31]:
for file_name in file_names:

    all_studies_df = pd.DataFrame()
    all_studies_scaled_df = pd.DataFrame()
    
    for study, file_path in file_paths.items():
        
        print(file_name, file_path)
        
        study_df = pd.read_csv(file_path + file_name)
        study_df = study_df[get_columns(file_name)]
        study_df['Study_Name'] = study
        study_df = get_study_subject_name(study_df)
        study_df = rename_cols(study_df)
        study_df.dropna(inplace=True)

        all_studies_df = all_studies_df.append(study_df)
    
    # ---------------------------------------------------------------------------
    # if file_name == 'data_3.csv':
    #     rb_df = pd.read_csv(all_studies_data_dir + 'rb_data.csv')
    #     all_studies_df = all_studies_df.merge(rb_df, on=['Subject', 'Study_Name', 'Study_Subject'], how='left')
    # ---------------------------------------------------------------------------
    
    # print(all_studies_df.columns)
    all_studies_df.to_csv(all_studies_data_dir + file_name, sep=',', index=False)


rb_data.csv ../../data/sim1/
rb_data.csv ../../data/sim2/
rb_data.csv ../../data/tt1/
rb_data.csv ../../data/office_tasks/
rb_data.csv ../../data/deadline_study/
data_2.5.csv ../../data/sim1/
data_2.5.csv ../../data/sim2/
data_2.5.csv ../../data/tt1/
data_2.5.csv ../../data/office_tasks/
data_2.5.csv ../../data/deadline_study/
data_3.csv ../../data/sim1/
data_3.csv ../../data/sim2/
data_3.csv ../../data/tt1/
data_3.csv ../../data/office_tasks/
data_3.csv ../../data/deadline_study/


In [32]:
# get_scaled_df(files=['data_3.csv'])