In [20]:
import sys
import os
module_path = os.path.abspath(os.path.join(os.pardir, os.pardir))
if module_path not in sys.path:    
    sys.path.append(module_path)
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:    
    sys.path.append(module_path)
import pandas as pd
#import seaborn as sns
import platform
from sleep_stage_config import Config
from utilities.utils import *
from sklearn.preprocessing import Normalizer
#sns.set(style='whitegrid', rc={'axes.facecolor': '#EFF2F7'})
import hrvanalysis as hrvana
from datetime import datetime
from pathlib import Path
from tqdm import tqdm

## This tutorial will explain the feature extraction pipeline for activity counts and heart rate variabilities in details
This tutorial only focuses on the pipline of feature extraction for each modality. We didn't align the actigraphy data and RR interval data by sleep epochs. You could find detailed information about aligning activity counts and RR intervals in `align_actigraphy_rri.py`

In [38]:
acc_path = r"\Dataset\MESA\actigraphy"   # the directory is the actigraphy data you downloaded from MESA
acc_feature_output_path = r"\tmp\sleep\act_features" # the directory is the actigraphy feature you want to save
hr_path = r"\Dataset\MESA\annotations-rpoints" # the directory is the R-points you downloaded from MESA
admin_file_path = r"\Dataset\MESA\mesa-sleep-dataset-0.3.0.csv" # the directory is the admin data you downloaded from MESA
hrv_feature_output_path = r"\tmp\sleep\hrv_features" # the directory is the HRV features you want to save
standarize_feature = True

In [22]:
admin_df = pd.read_csv(admin_file_path)
total_subjects_list = admin_df['mesaid'].unique()

In [137]:
all_acc_files = os.listdir(acc_path)
all_hr_files = os.listdir(hr_path)

In [138]:
len(all_hr_files)

1966

### Part 1 The pipeline of feature extraction for heart rate variability in details
Note: 
In our paper, we only used a single window length of 30s to extract the features related to RR-interval. This method will diminish physiological meaning of these features but yield a better classification outcome. 


In [39]:
hrv_win = 0 # 0 means we use one sleep epoch as the window length (every 30s)

In [140]:
all_hr_files[0]

'mesa-sleep-0001-rpoint.csv'

In [143]:
# only extract the HRV features if the subject is in the valid subject list
for PID in tqdm(total_subjects_list):
    mesa_id = "%04d" % PID
    hr_inlist_idx = [s for s in all_hr_files if mesa_id in s]
#     print("processing pid: %s" % os.path.basename(file).split("-")[2])
#     mesa_id = os.path.basename(file).split("-")[2]
    if len(hr_inlist_idx) > 0: # the pid is in the file list.
        hr_file_idx = all_hr_files.index(''.join(hr_inlist_idx))
        hr_df = pd.read_csv(os.path.join(hr_path, all_hr_files[hr_file_idx]))

        hr_df = hr_df[hr_df['TPoint'] > 0]
        hr_df['RR Intervals'] = hr_df['seconds'].diff() * 1000
        hr_df['RR Intervals'].fillna(hr_df['RR Intervals'].mean(), inplace=True) # make sure the data export from PSG software doesn't contain Nan
        clean_rri = hr_df['RR Intervals'].values
        clean_rri = hrvana.remove_outliers(rr_intervals=clean_rri, low_rri=300, high_rri=2000)
        clean_rri = hrvana.interpolate_nan_values(rr_intervals=clean_rri, interpolation_method="linear")
        clean_rri = hrvana.remove_ectopic_beats(rr_intervals=clean_rri, method="malik")
        clean_rri = hrvana.interpolate_nan_values(rr_intervals=clean_rri)
        hr_df["RR Intervals"] = clean_rri
        # calculate the Heart Rate
        hr_df['HR'] = np.round((60000.0 / hr_df['RR Intervals']), 0)

        # filter RRI if the RR intervals doesn't contain at least 4 valid beats
        t1 = hr_df.epoch.value_counts().reset_index().rename({'index': 'epoch_idx', 'epoch': 'count'}, axis=1)
        invalid_idx = set(t1[t1['count'] < 3]['epoch_idx'].values)
        del t1
        hr_df = hr_df[~hr_df['epoch'].isin(list(invalid_idx))]
        feature_list = []
        # go through all sleep epochs and extract HRV features from them
        for index, hr_epoch_idx in enumerate(hr_df['epoch'].unique()):
            gt_label = hr_df[hr_df['epoch'] == hr_epoch_idx]["stage"].values[0]  # in MESA r-points files, the stage is annotated by the sleep experts
            if hrv_win != 0:  # if you want get the features from a 5 min window, please set hrv_win = 10, as per sleep epoch is 30s
                offset = int(np.floor(hrv_win/2))
                tmp_hr_df = hr_df[hr_df['epoch'].isin(np.arange(hr_epoch_idx-offset, hr_epoch_idx+offset))]
            else:
                tmp_hr_df = hr_df[hr_df['epoch'] == hr_epoch_idx]
            try:  # check to see if the first time stamp is empty
                start_sec = float(tmp_hr_df['seconds'].head(1) * 1.0)
            except Exception as ee:
                print("Exception %s, source dataset: %s" % (ee, tmp_hr_df['seconds'].head(1)))
            # calculate the HRV features for each epoch
            rr_epoch = tmp_hr_df['RR Intervals'].values
            all_hr_features = {}
            try:
                all_hr_features.update(hrvana.get_time_domain_features(rr_epoch))
            except Exception as ee:
                print("processed time domain features with error message: {}".format(str(ee)))
            try:
                all_hr_features.update(hrvana.get_frequency_domain_features(rr_epoch))
            except Exception as ee:
                print("processed frequency domain features with error message: {}".format(str(ee)))
            try:
                all_hr_features.update(hrvana.get_poincare_plot_features(rr_epoch))
            except Exception as ee:
                print("processed poincare features: {} with error message".format(str(ee)))
            try:
                all_hr_features.update(hrvana.get_csi_cvi_features(rr_epoch))
            except Exception as ee:        
                print("processed csi cvi domain features: {} with error message".format(str(ee)))
            try:
                all_hr_features.update(hrvana.get_geometrical_features(rr_epoch))
            except Exception as ee:
                print("processed geometrical features: {} with error message".format(str(ee)))

            all_hr_features.update({'stages': gt_label
                                    , 'mesaid': str(mesa_id)                            
                                    , 'epoch': hr_epoch_idx
                                    #, 'index': index
                                    })
            feature_list.append(all_hr_features)
        hrv_feature_df = pd.DataFrame(feature_list)
        hrv_feature_df.to_csv(os.path.join(hrv_feature_output_path, (mesa_id + '_hrv_features.csv')), index=False)


  0%|                                                                                         | 0/2237 [00:00<?, ?it/s]

218 outlier(s) have been deleted.
The outlier(s) value(s) are : [250.0, 296.875, 261.71900000008463, 242.18800000016927, 222.65599999991537, 281.25, 8207.030999999915, 5132.813000000169, 2203.125, 25617.18699999983, 22882.81200000074, 51914.06200000074, 12632.81299999926, 4910.156000000825, 4054.6869999998307, 4972.655999999915, 5054.68799999926, 24996.094000000085, 14804.68799999926, 2371.0940000000846, 3902.3430000006556, 80726.56300000017, 5367.186999999831, 5613.280999999915, 4148.436999999831, 9894.530999999915, 8386.718000000656, 28371.094000000085, 7710.938000000169, 3394.531000000825, 3476.5630000001693, 7765.625, 3531.25, 3484.375, 9613.281000000825, 2386.718999999175, 5023.438000000169, 8996.093000000656, 4320.313000000169, 7695.313000000169, 14210.93699999983, 2585.9380000001693, 2574.2190000000846, 4304.688000000169, 24964.844000000085, 4046.875, 5019.530999999915, 5808.592999999746, 29187.5, 14605.469000000085, 3300.7809999999154, 3324.2190000000846, 2542.967999999746, 585

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  0%|                                                                               | 1/2237 [00:02<1:37:39,  2.62s/it]

50 outlier(s) have been deleted.
The outlier(s) value(s) are : [5265.625, 19558.59375, 38839.84375, 2738.28125, 16390.625, 12562.5, 2781.25, 9160.15625, 8195.3125, 16617.1875, 3625.0, 2218.75, 11144.53125, 2804.6875, 29746.09375, 13773.4375, 18554.6875, 2996.09375, 11019.53125, 2996.09375, 35367.1875, 6328.125, 5078.125, 29488.28125, 9812.5, 3707.03125, 6542.96875, 8152.34375, 2652.34375, 8519.53125, 38812.5, 27984.375, 12734.375, 2835.9375, 6531.25, 3910.15625, 3703.125, 15023.4375, 27093.75, 2695.3125, 269.53125, 2277.34375, 6320.3125, 238.28125, 13289.0625, 17351.5625, 242.1875, 2238.28125, 164.0625, 5867.1875]
101 ectopic beat(s) have been deleted with malik rule.


  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  0%|                                                                               | 2/2237 [00:04<1:32:24,  2.48s/it]

37 outlier(s) have been deleted.
The outlier(s) value(s) are : [4789.0625, 2199.21875, 2121.09375, 2246.09375, 2203.125, 2242.1875, 4957.03125, 2015.625, 2113.28125, 2078.125, 2566.40625, 2757.8125, 5085.9375, 9734.375, 7281.25, 11281.25, 2160.15625, 14914.0625, 7132.8125, 4535.15625, 2148.4375, 3414.0625, 6523.4375, 6425.78125, 6921.875, 2140.625, 2996.09375, 10054.6875, 2476.5625, 5738.28125, 7371.09375, 4332.03125, 2121.09375, 2652.34375, 3925.78125, 3042.96875, 3292.96875]
140 ectopic beat(s) have been deleted with malik rule.


  0%|                                                                               | 3/2237 [00:06<1:29:01,  2.39s/it]

37 outlier(s) have been deleted.
The outlier(s) value(s) are : [4789.0625, 2199.21875, 2121.09375, 2246.09375, 2203.125, 2242.1875, 4957.03125, 2015.625, 2113.28125, 2078.125, 2566.40625, 2757.8125, 5085.9375, 9734.375, 7281.25, 11281.25, 2160.15625, 14914.0625, 7132.8125, 4535.15625, 2148.4375, 3414.0625, 6523.4375, 6425.78125, 6921.875, 2140.625, 2996.09375, 10054.6875, 2476.5625, 5738.28125, 7371.09375, 4332.03125, 2121.09375, 2652.34375, 3925.78125, 3042.96875, 3292.96875]
140 ectopic beat(s) have been deleted with malik rule.


  0%|▏                                                                              | 4/2237 [00:09<1:26:42,  2.33s/it]

106 outlier(s) have been deleted.
The outlier(s) value(s) are : [2183.59375, 222.65625, 2363.28125, 4300.78125, 2585.9375, 8941.40625, 2378.90625, 261.71875, 5343.75, 2128.90625, 13492.1875, 21816.40625, 11222.65625, 20437.5, 11453.125, 26308.59375, 5902.34375, 51941.40625, 16371.09375, 149187.5, 22246.09375, 44660.15625, 10996.09375, 5031.25, 12664.0625, 29093.75, 7886.71875, 85648.4375, 3136.71875, 56609.375, 6222.65625, 24910.15625, 6242.1875, 12496.09375, 3136.71875, 25894.53125, 10683.59375, 5843.75, 15437.5, 7082.03125, 63738.28125, 12078.125, 41585.9375, 15378.90625, 4113.28125, 62062.5, 8429.6875, 10832.03125, 30046.875, 6070.3125, 5875.0, 16714.84375, 57527.34375, 53933.59375, 4210.9375, 13472.65625, 16781.25, 11355.46875, 113640.625, 45152.34375, 12882.8125, 12160.15625, 12500.0, 5125.0, 2839.84375, 35765.625, 592632.8125, 10718.75, 10585.9375, 5421.875, 104847.65625, 125875.0, 41292.96875, 9250.0, 169292.96875, 4175.78125, 10585.9375, 120164.0625, 10019.53125, 16886.71875, 7

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  0%|▏                                                                            

processed frequency domain features with error message: arange: cannot compute length


  0%|▏                                                                              | 5/2237 [00:12<1:30:19,  2.43s/it]

41 outlier(s) have been deleted.
The outlier(s) value(s) are : [289.0625, 2101.5625, 2203.125, 2179.6875, 207.03125, 2058.59375, 3015.625, 2992.1875, 3992.1875, 6339.84375, 3445.3125, 2613.28125, 4707.03125, 2261.71875, 2394.53125, 3035.15625, 2378.90625, 5878.90625, 2238.28125, 3222.65625, 230.46875, 3484.375, 2132.8125, 3964.84375, 156433.59375, 2511.71875, 7121.09375, 2320.3125, 2660.15625, 5976.5625, 3441.40625, 4902.34375, 11070.3125, 7269.53125, 6460.9375, 5972.65625, 12511.71875, 5824.21875, 5808.59375, 7914.0625, 2046.875]
186 ectopic beat(s) have been deleted with malik rule.





KeyboardInterrupt: 

### Part-2 The pipeline of feature extraction for activity counts in details
Note: 

For the actigraphy based sliding window method, the `get_statistic_feature` function will calculate statistic features based on two window-centring methods. The centred window and backwards-looking window. The dataframe passed in as the function's argument will be expanded to include the calculated features. In this tutorial, we show an example of using 20 sleep epochs as the length of the window. In MESA actigraphy, the 370 calculated features will be appended after column *daybynoon* the calculated actigraphy features can be found in `acc_feature_output_path`


In [25]:
for PID in tqdm(total_subjects_list):
    mesa_id = "%04d" % PID
    # filter Acc and HR based on the overlap records
    print('*' * 100)
    print("Processing subject %s dataset" % mesa_id)
    acc_inlist_idx = [s for s in all_acc_files if mesa_id in s]
    feature_list = []
    if len(acc_inlist_idx) > 0: # the pid is in the file list.
        # get the raw dataset file index
        acc_file_idx = all_acc_files.index(''.join(acc_inlist_idx))
        # load Acc and HR into Pandas
        acc_df = pd.read_csv(os.path.join(acc_path, all_acc_files[acc_file_idx]))
        # filter ACC, as per MESA website recommanded.
        acc_df = acc_df[acc_df['interval'] != 'EXCLUDED']
        #combined_pd = combined_pd.reset_index(drop=True)
        acc_df['timestamp'] = pd.to_datetime(acc_df['linetime'])
        acc_df['base_time'] = pd.to_datetime('00:00:00')
        acc_df['seconds'] = (acc_df['timestamp'] - acc_df['base_time'])
        acc_df['seconds'] = acc_df['seconds'].dt.seconds
        acc_df.drop(['timestamp', 'base_time'], axis=1, inplace=True)
        featnames = get_statistic_feature(acc_df, column_name="activity", windows_size=20)

        list_size_chk = np.array(acc_df[['marker', 'activity']].values.tolist())
        # check whether the activity is empty
        if len(list_size_chk.shape) < 2:
            print(
                "File {f_name} doesn't meet dimension requirement, it's size is {wrong_dim}".format(
                    f_name=all_acc_files[acc_file_idx], wrong_dim=list_size_chk.shape)
            )
        else:
            acc_df = acc_df.fillna(acc_df.median()) # fill the missing data with their median value which is resistant to outliters 
            # standardise and normalise the df
            feature_list = acc_df.columns.to_list() 
            std_feature = [x for x in feature_list if x not in ['two_stages', 'seconds', 'activity', 'interval', 'wake', 'linetime', 'mesaid', 'stages', 'line']]
            if standarize_feature:
                standardize_df_given_feature(acc_df, std_feature, df_name='acc_df', simple_method=False)
            acc_df.to_csv(os.path.join(acc_feature_output_path, (mesa_id + '_act_features.csv')), index=False)

  0%|                                                                                         | 0/2237 [00:00<?, ?it/s]

****************************************************************************************************
Processing subject 0001 dataset
pre-processing dataset frame using comprehensive method
quality check on acc_df for column name: offwrist
quality check on acc_df for column name: marker
quality check on acc_df for column name: whitelight
quality check on acc_df for column name: redlight
quality check on acc_df for column name: greenlight
quality check on acc_df for column name: bluelight
quality check on acc_df for column name: dayofweek
quality check on acc_df for column name: daybymidnight
quality check on acc_df for column name: daybynoon
quality check on acc_df for column name: _mean_1
quality check on acc_df for column name: _mean_centered_1
quality check on acc_df for column name: _median_1
quality check on acc_df for column name: _median_centered_1
quality check on acc_df for column name: _std_1
quality check on acc_df for column name: _std_centered_1
quality check on acc_df for 

quality check on acc_df for column name: _std_centered_16
quality check on acc_df for column name: _max_16
quality check on acc_df for column name: _max_centered_16
quality check on acc_df for column name: _min_16
quality check on acc_df for column name: _min_centered_16
quality check on acc_df for column name: _var_16
quality check on acc_df for column name: _var_centered_16
quality check on acc_df for column name: _nat_16
quality check on acc_df for column name: _nat_centered_16
quality check on acc_df for column name: _anyact_16
quality check on acc_df for column name: _anyact_centered_16
quality check on acc_df for column name: _skew_16
quality check on acc_df for column name: _skew_centered_16
quality check on acc_df for column name: _kurt_16
quality check on acc_df for column name: _kurt_centered_16
quality check on acc_df for column name: _mean_17
quality check on acc_df for column name: _mean_centered_17
quality check on acc_df for column name: _median_17
quality check on acc_d

  0%|                                                                              | 1/2237 [00:17<10:46:33, 17.35s/it]

****************************************************************************************************
Processing subject 0002 dataset
pre-processing dataset frame using comprehensive method
quality check on acc_df for column name: offwrist
quality check on acc_df for column name: marker
quality check on acc_df for column name: whitelight
quality check on acc_df for column name: redlight
quality check on acc_df for column name: greenlight
quality check on acc_df for column name: bluelight
quality check on acc_df for column name: dayofweek
quality check on acc_df for column name: daybymidnight
quality check on acc_df for column name: daybynoon
quality check on acc_df for column name: _mean_1
quality check on acc_df for column name: _mean_centered_1
quality check on acc_df for column name: _median_1
quality check on acc_df for column name: _median_centered_1
quality check on acc_df for column name: _std_1
quality check on acc_df for column name: _std_centered_1
quality check on acc_df for 

quality check on acc_df for column name: _std_10
quality check on acc_df for column name: _std_centered_10
quality check on acc_df for column name: _max_10
quality check on acc_df for column name: _max_centered_10
quality check on acc_df for column name: _min_10
quality check on acc_df for column name: _min_centered_10
quality check on acc_df for column name: _var_10
quality check on acc_df for column name: _var_centered_10
quality check on acc_df for column name: _nat_10
quality check on acc_df for column name: _nat_centered_10
quality check on acc_df for column name: _anyact_10
quality check on acc_df for column name: _anyact_centered_10
quality check on acc_df for column name: _skew_10
quality check on acc_df for column name: _skew_centered_10
quality check on acc_df for column name: _kurt_10
quality check on acc_df for column name: _kurt_centered_10
quality check on acc_df for column name: _mean_11
quality check on acc_df for column name: _mean_centered_11
quality check on acc_df f

quality check on acc_df for column name: _anyact_19
quality check on acc_df for column name: _anyact_centered_19
quality check on acc_df for column name: _skew_19
quality check on acc_df for column name: _skew_centered_19
quality check on acc_df for column name: _kurt_19
quality check on acc_df for column name: _kurt_centered_19
quality check on acc_df for column name: _Act
quality check on acc_df for column name: _LocAct
 Not given scaler start training scaler now!
start transform dataset frame :acc_df


  0%|                                                                               | 2/2237 [00:28<9:39:23, 15.55s/it]

****************************************************************************************************
Processing subject 0006 dataset
pre-processing dataset frame using comprehensive method
quality check on acc_df for column name: offwrist
quality check on acc_df for column name: marker
quality check on acc_df for column name: whitelight
quality check on acc_df for column name: redlight
quality check on acc_df for column name: greenlight
quality check on acc_df for column name: bluelight
quality check on acc_df for column name: dayofweek
quality check on acc_df for column name: daybymidnight
quality check on acc_df for column name: daybynoon
quality check on acc_df for column name: _mean_1
quality check on acc_df for column name: _mean_centered_1
quality check on acc_df for column name: _median_1
quality check on acc_df for column name: _median_centered_1
quality check on acc_df for column name: _std_1
quality check on acc_df for column name: _std_centered_1
quality check on acc_df for 

quality check on acc_df for column name: _max_centered_10
quality check on acc_df for column name: _min_10
quality check on acc_df for column name: _min_centered_10
quality check on acc_df for column name: _var_10
quality check on acc_df for column name: _var_centered_10
quality check on acc_df for column name: _nat_10
quality check on acc_df for column name: _nat_centered_10
quality check on acc_df for column name: _anyact_10
quality check on acc_df for column name: _anyact_centered_10
quality check on acc_df for column name: _skew_10
quality check on acc_df for column name: _skew_centered_10
quality check on acc_df for column name: _kurt_10
quality check on acc_df for column name: _kurt_centered_10
quality check on acc_df for column name: _mean_11
quality check on acc_df for column name: _mean_centered_11
quality check on acc_df for column name: _median_11
quality check on acc_df for column name: _median_centered_11
quality check on acc_df for column name: _std_11
quality check on ac

quality check on acc_df for column name: _anyact_centered_19
quality check on acc_df for column name: _skew_19
quality check on acc_df for column name: _skew_centered_19
quality check on acc_df for column name: _kurt_19
quality check on acc_df for column name: _kurt_centered_19
quality check on acc_df for column name: _Act
quality check on acc_df for column name: _LocAct
 Not given scaler start training scaler now!
start transform dataset frame :acc_df


  0%|                                                                               | 3/2237 [00:39<8:46:43, 14.15s/it]

****************************************************************************************************
Processing subject 0010 dataset
pre-processing dataset frame using comprehensive method
quality check on acc_df for column name: offwrist
quality check on acc_df for column name: marker
quality check on acc_df for column name: whitelight
quality check on acc_df for column name: redlight
quality check on acc_df for column name: greenlight
quality check on acc_df for column name: bluelight
quality check on acc_df for column name: dayofweek
quality check on acc_df for column name: daybymidnight
quality check on acc_df for column name: daybynoon
quality check on acc_df for column name: _mean_1
quality check on acc_df for column name: _mean_centered_1
quality check on acc_df for column name: _median_1
quality check on acc_df for column name: _median_centered_1
quality check on acc_df for column name: _std_1
quality check on acc_df for column name: _std_centered_1
quality check on acc_df for 

quality check on acc_df for column name: _mean_10
quality check on acc_df for column name: _mean_centered_10
quality check on acc_df for column name: _median_10
quality check on acc_df for column name: _median_centered_10
quality check on acc_df for column name: _std_10
quality check on acc_df for column name: _std_centered_10
quality check on acc_df for column name: _max_10
quality check on acc_df for column name: _max_centered_10
quality check on acc_df for column name: _min_10
quality check on acc_df for column name: _min_centered_10
quality check on acc_df for column name: _var_10
quality check on acc_df for column name: _var_centered_10
quality check on acc_df for column name: _nat_10
quality check on acc_df for column name: _nat_centered_10
quality check on acc_df for column name: _anyact_10
quality check on acc_df for column name: _anyact_centered_10
quality check on acc_df for column name: _skew_10
quality check on acc_df for column name: _skew_centered_10
quality check on acc_

quality check on acc_df for column name: _mean_centered_19
quality check on acc_df for column name: _median_19
quality check on acc_df for column name: _median_centered_19
quality check on acc_df for column name: _std_19
quality check on acc_df for column name: _std_centered_19
quality check on acc_df for column name: _max_19
quality check on acc_df for column name: _max_centered_19
quality check on acc_df for column name: _min_19
quality check on acc_df for column name: _min_centered_19
quality check on acc_df for column name: _var_19
quality check on acc_df for column name: _var_centered_19
quality check on acc_df for column name: _nat_19
quality check on acc_df for column name: _nat_centered_19
quality check on acc_df for column name: _anyact_19
quality check on acc_df for column name: _anyact_centered_19
quality check on acc_df for column name: _skew_19
quality check on acc_df for column name: _skew_centered_19
quality check on acc_df for column name: _kurt_19
quality check on acc_

  0%|▏                                                                              | 4/2237 [00:50<8:15:44, 13.32s/it]

****************************************************************************************************
Processing subject 0012 dataset
pre-processing dataset frame using comprehensive method
quality check on acc_df for column name: offwrist
quality check on acc_df for column name: marker
quality check on acc_df for column name: whitelight
quality check on acc_df for column name: redlight
quality check on acc_df for column name: greenlight
quality check on acc_df for column name: bluelight
quality check on acc_df for column name: dayofweek
quality check on acc_df for column name: daybymidnight
quality check on acc_df for column name: daybynoon
quality check on acc_df for column name: _mean_1
quality check on acc_df for column name: _mean_centered_1
quality check on acc_df for column name: _median_1
quality check on acc_df for column name: _median_centered_1
quality check on acc_df for column name: _std_1
quality check on acc_df for column name: _std_centered_1
quality check on acc_df for 

quality check on acc_df for column name: _kurt_9
quality check on acc_df for column name: _kurt_centered_9
quality check on acc_df for column name: _mean_10
quality check on acc_df for column name: _mean_centered_10
quality check on acc_df for column name: _median_10
quality check on acc_df for column name: _median_centered_10
quality check on acc_df for column name: _std_10
quality check on acc_df for column name: _std_centered_10
quality check on acc_df for column name: _max_10
quality check on acc_df for column name: _max_centered_10
quality check on acc_df for column name: _min_10
quality check on acc_df for column name: _min_centered_10
quality check on acc_df for column name: _var_10
quality check on acc_df for column name: _var_centered_10
quality check on acc_df for column name: _nat_10
quality check on acc_df for column name: _nat_centered_10
quality check on acc_df for column name: _anyact_10
quality check on acc_df for column name: _anyact_centered_10
quality check on acc_df

quality check on acc_df for column name: _mean_19
quality check on acc_df for column name: _mean_centered_19
quality check on acc_df for column name: _median_19
quality check on acc_df for column name: _median_centered_19
quality check on acc_df for column name: _std_19
quality check on acc_df for column name: _std_centered_19
quality check on acc_df for column name: _max_19
quality check on acc_df for column name: _max_centered_19
quality check on acc_df for column name: _min_19
quality check on acc_df for column name: _min_centered_19
quality check on acc_df for column name: _var_19
quality check on acc_df for column name: _var_centered_19
quality check on acc_df for column name: _nat_19
quality check on acc_df for column name: _nat_centered_19
quality check on acc_df for column name: _anyact_19
quality check on acc_df for column name: _anyact_centered_19
quality check on acc_df for column name: _skew_19
quality check on acc_df for column name: _skew_centered_19
quality check on acc_

  0%|▏                                                                              | 4/2237 [01:01<9:31:09, 15.35s/it]


KeyboardInterrupt: 