# Exclusions and Feature Extraction
Code by Kat Marton
Last edited 2/28 by Kat, written in google colaboratory

currently a WIP. Things that I still need to do: 1) write code to exclude the last window in each block, since they are not of the same size as the other windows. 2) add cov23 AND 24 (24 used the EmbracePlus for remote but the E4 for the lab visit). 3) using the dataframes with invalid windows/blocks excluded, run feature extraction

In [2]:
!pip install neurokit2

Collecting neurokit2
  Downloading neurokit2-0.2.7-py2.py3-none-any.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: neurokit2
Successfully installed neurokit2-0.2.7


In [3]:
import neurokit2 as nk
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from scipy.signal import find_peaks

In [4]:
from google.colab import drive
drive.mount('/content/drive/')
%cd "/content/drive/MyDrive/coding/data"

Mounted at /content/drive/
/content/drive/MyDrive/coding/data


In [5]:
def read_df (subjID):
  f_path = subjID + "_lab_filtered_cleaned.csv"
  df = pd.read_csv(f_path)
  df["Block"] = df["Block"].astype(int)
  df["SubjID"] = subjID

  return df

In [79]:
cov4 = read_df("Cov4")
cov7 = read_df("Cov7")
cov8 = read_df("Cov8")
cov13 = read_df("Cov13")
cov14 = read_df("Cov14")
cov20 = read_df("Cov20")

cov19 = read_df("Cov19")
cov22 = read_df("Cov22")

In [24]:
#grouping by block
def windows_by_block(df):
  df["block_index"] = df.groupby(["Block"]).cumcount()+1

  df["window_num_60"] = (df["block_index"]/3840).astype(int)
  df['window_num_60'] = df['Block'].astype(str) + "." + df['window_num_60'].astype(str)

  df["window_num_30"] = (df["block_index"]/1920).astype(int)
  df['window_num_30'] = df['Block'].astype(str) + "." + df['window_num_30'].astype(str)

  df["window_num_10"] = (df["block_index"]/640).astype(int)
  df['window_num_10'] = df['Block'].astype(str) + "." + df['window_num_10'].astype(str)

  return df

In [80]:
cov4a = windows_by_block(cov4)
cov7a = windows_by_block(cov7)
cov8a = windows_by_block(cov8)
cov13a = windows_by_block(cov13)
cov14a = windows_by_block(cov14)
cov20a = windows_by_block(cov20)

cov19a = windows_by_block(cov19)
cov22a = windows_by_block(cov22)

In [27]:
#findpeaks in scipy
#can add optional parameters to the find_peaks function to adjust for minimum height, distance and threshhold
#distance here is defined as 26, which comes out to roughly 0.4s as in Campanella_2023

def scipy_peaks(df):
  peaks = find_peaks(df["padasip"], height=0, distance=26)
  peaks_series = pd.Series(peaks[0])
  df["Peaks"] = 0
  df.loc[peaks_series, "Peaks"] = 1

  return df

In [81]:
cov4b = scipy_peaks(cov4a)
cov7b = scipy_peaks(cov7a)
cov8b = scipy_peaks(cov8a)
cov13b = scipy_peaks(cov13a)
cov14b = scipy_peaks(cov14a)
cov20b = scipy_peaks(cov20a)

cov19b = scipy_peaks(cov19a)
cov22b = scipy_peaks(cov22a)

In [29]:
#Calculate peak to peak intervals, i.e. heartbeats. Check if the interval is shorter than 500ms or longer than 1200ms.
#If it is, mark the interval for exclusion
def find_exclusions(df):
  df["timestamp"] = pd.to_datetime(df["timestamp"])

  dfc = df.drop(df[df["Peaks"] == 0].index)
  dfc["diff"] = dfc['timestamp'].diff()
  dfc['size'] = (dfc["diff"] < pd.Timedelta(milliseconds=500)) | (dfc["diff"] > pd.Timedelta(milliseconds=1200))
  percent_true_60 = dfc.groupby(["Block", "window_num_60"])['size'].mean()
  percent_true_30 = dfc.groupby(["Block", "window_num_30"])['size'].mean()
  percent_true_10 = dfc.groupby(["Block", "window_num_10"])['size'].mean()


  exdf_60 = percent_true_60.to_frame()
  exdf_60['exclude'] = exdf_60["size"] > 0.15

  exdf_30 = percent_true_30.to_frame()
  exdf_30['exclude'] = exdf_30["size"] > 0.15

  exdf_10 = percent_true_10.to_frame()
  exdf_10['exclude'] = exdf_10["size"] > 0.15


  #descriptive stats of the windows
  SubjID = df["SubjID"][0]

  print(SubjID + "'s percentage of Total Excluded 60s Windows: " + str(exdf_60["exclude"].mean()*100))
  print("Percentage of Excluded 60s Windows by Block: ")
  print(exdf_60.groupby(["Block"])['exclude'].mean()*100)

  print(SubjID + "'s percentage of Total Excluded 30s Windows: " + str(exdf_30["exclude"].mean()*100))
  print("Percentage of Excluded 30s Windows by Block: ")
  print(exdf_30.groupby(["Block"])['exclude'].mean()*100)

  print(SubjID + "'s percentage of Total Excluded 10s Windows: " + str(exdf_10["exclude"].mean()*100))
  print("Percentage of Excluded 10s Windows by Block: ")
  print(exdf_10.groupby(["Block"])['exclude'].mean()*100)

  return (exdf_60, exdf_30, exdf_10)

In [66]:
#this function excludes both blocks and windows. Blocks are excluded if more than 20% of the windows in a given block are invalid.
# Windows are excluded if 15% of the intervals or more are invalid, as calculated in the find_exclusions function

#where df is the original dataframe i.e. cov4b, and exclusions is the dataframe of excluded blocks, i.e. cov4c
def exclude_blocks(df, exclusions):
  ex_60 = exclusions[0].reset_index(level='window_num_60')
  ex_30 = exclusions[1].reset_index(level='window_num_30')
  ex_10 = exclusions[2].reset_index(level='window_num_10')

  #part 1: exclude on the window level (we do it this order to avoid errors; has the same outcome)
  ex_60_list = ex_60.query('exclude==True')
  df_60 = df[~df.loc[:,'window_num_60'].isin(ex_60_list['window_num_60'])]

  ex_30_list = ex_30.query('exclude==True')
  df_30 = df[~df.loc[:,'window_num_30'].isin(ex_30_list['window_num_30'])]

  ex_10_list = ex_10.query('exclude==True')
  df_10 = df[~df.loc[:,'window_num_10'].isin(ex_10_list['window_num_10'])]


  #part 2: exclude on the block level
  blocks_60 = ex_60.groupby(["Block"])['exclude'].mean()
  blocks_60 = blocks_60.reset_index(level='Block')
  blocks_60["ex_block"] = blocks_60["exclude"] > 0.2

  ex_blocks_60 = blocks_60.query('ex_block==True')
  df_60 = df[~df.loc[:,'Block'].isin(ex_blocks_60['Block'])]


  blocks_30 = ex_30.groupby(["Block"])['exclude'].mean()
  blocks_30 = blocks_30.reset_index(level='Block')
  blocks_30["ex_block"] = blocks_30["exclude"] > 0.2

  ex_blocks_30 = blocks_30.query('ex_block==True')
  df_30 = df[~df.loc[:,'Block'].isin(ex_blocks_30['Block'])]


  blocks_10 = ex_10.groupby(["Block"])['exclude'].mean()
  blocks_10 = blocks_10.reset_index(level='Block')
  blocks_10["ex_block"] = blocks_10["exclude"] > 0.2

  ex_blocks_10 = blocks_10.query('ex_block==True')
  df_10 = df[~df.loc[:,'Block'].isin(ex_blocks_10['Block'])]

  return (df_60, df_30, df_10)

In [82]:
cov22c = find_exclusions(cov22b)

Cov22's percentage of Total Excluded 60s Windows: 45.94594594594595
Percentage of Excluded 60s Windows by Block: 
Block
0    33.333333
1    66.666667
2    40.000000
3    40.000000
4    50.000000
5    75.000000
6    20.000000
Name: exclude, dtype: float64
Cov22's percentage of Total Excluded 30s Windows: 43.47826086956522
Percentage of Excluded 30s Windows by Block: 
Block
0    27.272727
1    54.545455
2    44.444444
3    40.000000
4    54.545455
5    62.500000
6    22.222222
Name: exclude, dtype: float64
Cov22's percentage of Total Excluded 10s Windows: 52.52525252525253
Percentage of Excluded 10s Windows by Block: 
Block
0    50.000000
1    59.375000
2    51.851852
3    53.571429
4    58.064516
5    60.869565
6    32.000000
Name: exclude, dtype: float64


In [None]:
def get_features(x):
  return nk.hrv_time(x, sampling_rate=64)

In [None]:
#grouping by block
def features_by_windows(df, window_size):
  by_block = df.groupby(["Block", window_size, "SubjID"])['Peaks']
  df = by_block.apply(get_features)
  return df

In [None]:
features_by_windows(cov4b, "window_num_60")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,HRV_MeanNN,HRV_SDNN,HRV_SDANN1,HRV_SDNNI1,HRV_SDANN2,HRV_SDNNI2,HRV_SDANN5,HRV_SDNNI5,HRV_RMSSD,HRV_SDSD,...,HRV_IQRNN,HRV_SDRMSSD,HRV_Prc20NN,HRV_Prc80NN,HRV_pNN50,HRV_pNN20,HRV_MinNN,HRV_MaxNN,HRV_HTI,HRV_TINN
Block,window_num_60,SubjID,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
0,0,Cov4,0,709.821429,121.76455,,,,,,,169.123304,170.029521,...,125.0,0.719975,640.625,781.25,66.666667,80.952381,421.875,1250.0,10.5,250.0
0,1,Cov4,0,783.125,92.079715,,,,,,,140.929662,141.891014,...,93.75,0.653374,718.75,843.75,56.0,78.666667,640.625,1078.125,5.0,93.75
0,2,Cov4,0,767.248377,132.329919,,,,,,,179.078631,180.19427,...,171.875,0.738949,656.25,859.375,68.831169,87.012987,515.625,1093.75,12.833333,171.875
0,3,Cov4,0,760.416667,111.418943,,,,,,,144.70322,145.637782,...,93.75,0.769982,687.5,821.875,58.974359,82.051282,406.25,1250.0,11.142857,296.875
0,4,Cov4,0,762.419872,109.225866,,,,,,,146.856434,147.810405,...,121.09375,0.743759,687.5,837.5,52.564103,71.794872,609.375,1265.625,7.090909,78.125
0,5,Cov4,0,745.907738,77.555817,,,,,,,94.94914,96.051216,...,105.46875,0.816814,671.875,796.875,33.333333,52.380952,593.75,984.375,5.25,78.125
1,0,Cov4,0,773.133117,149.947102,,,,,,,188.37174,189.578212,...,218.75,0.796017,656.25,903.125,77.922078,89.61039,406.25,1093.75,15.4,343.75
1,1,Cov4,0,765.625,97.182238,,,,,,,129.716725,130.57864,...,93.75,0.749188,706.25,828.125,61.038961,79.220779,406.25,984.375,7.7,312.5
1,2,Cov4,0,739.648438,110.147693,,,,,,,142.729939,143.633134,...,144.53125,0.771721,656.25,828.125,57.5,75.0,531.25,1078.125,10.0,218.75
1,3,Cov4,0,820.3125,107.103552,,,,,,,166.932373,167.951549,...,113.28125,0.641598,734.375,906.25,62.5,76.388889,656.25,1187.5,10.285714,93.75




In [None]:
features_by_windows(cov4b, "window_num_30")

  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,HRV_MeanNN,HRV_SDNN,HRV_SDANN1,HRV_SDNNI1,HRV_SDANN2,HRV_SDNNI2,HRV_SDANN5,HRV_SDNNI5,HRV_RMSSD,HRV_SDSD,...,HRV_IQRNN,HRV_SDRMSSD,HRV_Prc20NN,HRV_Prc80NN,HRV_pNN50,HRV_pNN20,HRV_MinNN,HRV_MaxNN,HRV_HTI,HRV_TINN
Block,window_num_30,SubjID,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
0,0,Cov4,0,696.800595,117.336517,,,,,,,174.829103,176.906333,...,121.09375,0.671150,625.000,793.750,66.666667,78.571429,421.875,953.125,10.500000,203.125
0,1,Cov4,0,725.609756,126.362148,,,,,,,158.017348,159.359582,...,109.37500,0.799673,656.250,781.250,63.414634,80.487805,421.875,1250.000,6.833333,328.125
0,2,Cov4,0,801.649306,111.729719,,,,,,,183.419132,186.030216,...,125.00000,0.609150,718.750,875.000,66.666667,80.555556,640.625,1078.125,5.142857,93.750
0,3,Cov4,0,764.802632,66.683010,,,,,,,84.417219,85.427309,...,58.59375,0.789922,718.750,796.875,42.105263,73.684211,656.250,968.750,4.750000,78.125
0,4,Cov4,0,711.509146,106.725588,,,,,,,129.036393,130.641906,...,109.37500,0.827097,625.000,765.625,60.975610,85.365854,515.625,1031.250,8.200000,171.875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,4,Cov4,0,800.781250,118.136042,,,,,,,174.992028,177.525990,...,82.03125,0.675094,718.750,843.750,55.555556,69.444444,640.625,1125.000,6.000000,171.875
6,5,Cov4,0,740.384615,155.987254,,,,,,,171.425849,173.726970,...,195.31250,0.909940,634.375,856.250,76.923077,82.051282,468.750,1078.125,7.800000,171.875
6,6,Cov4,0,809.895833,121.404771,,,,,,,164.619590,166.948592,...,132.81250,0.737487,734.375,906.250,72.222222,83.333333,500.000,1062.500,9.000000,328.125
6,7,Cov4,0,736.718750,129.889835,,,,,,,147.681738,149.603491,...,175.78125,0.879525,640.625,834.375,62.500000,80.000000,531.250,1031.250,6.666667,171.875




In [None]:
features_by_windows(cov4b, "window_num_10")

  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,HRV_MeanNN,HRV_SDNN,HRV_SDANN1,HRV_SDNNI1,HRV_SDANN2,HRV_SDNNI2,HRV_SDANN5,HRV_SDNNI5,HRV_RMSSD,HRV_SDSD,...,HRV_IQRNN,HRV_SDRMSSD,HRV_Prc20NN,HRV_Prc80NN,HRV_pNN50,HRV_pNN20,HRV_MinNN,HRV_MaxNN,HRV_HTI,HRV_TINN
Block,window_num_10,SubjID,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
0,0,Cov4,0,713.942308,96.400175,,,,,,,127.418019,133.055982,...,62.50000,0.756566,687.500,778.125,46.153846,61.538462,468.750,875.000,4.333333,234.375
0,1,Cov4,0,670.758929,130.507326,,,,,,,201.080689,208.838530,...,132.81250,0.649030,625.000,778.125,57.142857,78.571429,421.875,890.625,4.666667,203.125
0,2,Cov4,0,721.153846,126.913440,,,,,,,196.610272,201.258911,...,140.62500,0.645508,631.250,809.375,84.615385,84.615385,484.375,953.125,6.500000,187.500
0,3,Cov4,0,728.365385,63.960304,,,,,,,84.505108,87.747806,...,109.37500,0.756881,671.875,781.250,46.153846,76.923077,625.000,828.125,4.333333,156.250
0,4,Cov4,0,686.383929,71.855214,,,,,,,80.842172,83.807744,...,101.56250,0.888833,640.625,750.000,57.142857,64.285714,515.625,781.250,7.000000,125.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,20,Cov4,0,761.718750,169.223478,,,,,,,213.044506,223.040314,...,175.78125,0.794310,650.000,903.125,75.000000,83.333333,500.000,1062.500,6.000000,187.500
6,21,Cov4,0,751.302083,127.888906,,,,,,,142.272546,148.739973,...,97.65625,0.898901,665.625,840.625,58.333333,66.666667,578.125,1031.250,3.000000,125.000
6,22,Cov4,0,722.355769,128.396854,,,,,,,148.300375,154.170586,...,156.25000,0.865789,643.750,828.125,53.846154,76.923077,531.250,1000.000,4.333333,140.625
6,23,Cov4,0,752.403846,144.033653,,,,,,,117.966163,123.181613,...,187.50000,1.220974,640.625,884.375,61.538462,84.615385,609.375,1031.250,6.500000,0.000


