### Configuration

In [1]:
import os
import numpy as np
import pandas as pd

from scipy.stats import zscore

from utils__helpers_macro import ied_event_rejection, epoch_rejection
from utils__helpers_macro import mark_isolation, robust_zscore
import utils__config

In [2]:
os.chdir(utils__config.working_directory)
os.getcwd()

'g:\\My Drive\\Residency\\Research\\Lab - Damisah\\Project - Sleep'

### Parameters

In [3]:
potato_path = 'Cache/Subject01/S01_potatogram.csv'
dictionary = 'Data/Subject01/S01_dictionary.xlsx'
ied_path = 'Cache/Subject01/S01_IED.csv'
epoch_path = 'Cache/Subject01/S01_bad_epochs.csv'
raw_sw_path = 'Cache/Subject01/S01_SW_raw.csv'
raw_kc_path = 'Cache/Subject01/S01_KC_raw.csv'
raw_sp_path = 'Cache/Subject01/S01_SP_raw.csv'
er_path = 'Cache/Subject01/S01_event_rates.csv'
clean_sw_path = 'Cache/Subject01/S01_SW.csv'
clean_sp_path = 'Cache/Subject01/S01_SP.csv'
clean_kc_path = 'Cache/Subject01/S01_KC.csv'

# potato_path = 'Cache/Subject02/Apr26/S02_potatogram.csv'
# dictionary = 'Data/Subject02/S02_dictionary.xlsx'
# ied_path = 'Cache/Subject02/Apr26/S02_IED.csv'
# epoch_path = 'Cache/Subject02/Apr26/S02_bad_epochs.csv'
# raw_sw_path = 'Cache/Subject02/Apr26/S02_SW_raw.csv'
# raw_kc_path = 'Cache/Subject02/Apr26/S02_KC_raw.csv'
# raw_sp_path = 'Cache/Subject02/Apr26/S02_SP_raw.csv'
# er_path = 'Cache/Subject02/Apr26/S02_event_rates.csv'
# clean_sw_path = 'Cache/Subject02/Apr26/S02_SW.csv'
# clean_sp_path = 'Cache/Subject02/Apr26/S02_SP.csv'
# clean_kc_path = 'Cache/Subject02/Apr26/S02_KC.csv'

# potato_path = 'Cache/Subject02/Apr27/S02_potatogram.csv'
# dictionary = 'Data/Subject02/S02_dictionary.xlsx'
# ied_path = 'Cache/Subject02/Apr27/S02_IED.csv'
# epoch_path = 'Cache/Subject02/Apr27/S02_bad_epochs.csv'
# raw_sw_path = 'Cache/Subject02/Apr27/S02_SW_raw.csv'
# raw_kc_path = 'Cache/Subject02/Apr27/S02_KC_raw.csv'
# raw_sp_path = 'Cache/Subject02/Apr27/S02_SP_raw.csv'
# er_path = 'Cache/Subject02/Apr27/S02_event_rates.csv'
# clean_sw_path = 'Cache/Subject02/Apr27/S02_SW.csv'
# clean_sp_path = 'Cache/Subject02/Apr27/S02_SP.csv'
# clean_kc_path = 'Cache/Subject02/Apr27/S02_KC.csv'

In [5]:
# Minimum number of events per minute of SWS
sw_threshold = 3
kc_threshold = 0.25
sp_threshold = 0.25

sampling_freq = 256

### Remove Events near IEDs

In [6]:
ieds = pd.read_csv(ied_path)
bad_epochs = pd.read_csv(epoch_path)
slow_waves = pd.read_csv(raw_sw_path)
k_complexes = pd.read_csv(raw_kc_path)
spindles = pd.read_csv(raw_sp_path)

In [7]:
# Remove Slow Waves that overlap with IED's within +/- 1 second
rejected_sw = ied_event_rejection(event_data = slow_waves,
                                  ied_data = ieds,
                                  rej_int = 1 # rejection interval in seconds
                                  )

slow_waves = slow_waves[~slow_waves['ID'].isin(rejected_sw)]
print('Total number of slow-waves: ', len(slow_waves))

Number of rejected events:  11
Total number of slow-waves:  23586


In [8]:
# Remove K-Complexes that overlap with IED's within +/- 1 second
rejected_kc = ied_event_rejection(event_data = k_complexes,
                                  ied_data = ieds,
                                  rej_int = 1 # rejection interval in seconds
                                  )

k_complexes = k_complexes[~k_complexes['ID'].isin(rejected_kc)]
print('Total number of k-complexes: ', len(k_complexes))

Number of rejected events:  0
Total number of k-complexes:  1692


In [9]:
# Remove Spindles that overlap with IED's within +/- 1 second
rejected_sp = ied_event_rejection(event_data = spindles,
                                  ied_data = ieds,
                                  rej_int = 1 # rejection interval in seconds
                                  )

spindles = spindles[~spindles['ID'].isin(rejected_sp)]
print('Total number of spindles: ', len(spindles))

Number of rejected events:  3
Total number of spindles:  7066


### Remove events intersecting with rejected epochs

In [10]:
rejected_sw = epoch_rejection(event_data = slow_waves,
                              epoch_data = bad_epochs)

slow_waves = slow_waves[~slow_waves['ID'].isin(rejected_sw)]
print('Total number of slow-waves: ', len(slow_waves))

Number of rejected events:  339
Total number of slow-waves:  23247


In [11]:
rejected_kc = epoch_rejection(event_data = k_complexes,
                              epoch_data = bad_epochs)

k_complexes = k_complexes[~k_complexes['ID'].isin(rejected_kc)]
print('Total number of k-complexes: ', len(k_complexes))

Number of rejected events:  43
Total number of k-complexes:  1649


In [12]:
rejected_sp = epoch_rejection(event_data = spindles,
                              epoch_data = bad_epochs)

spindles = spindles[~spindles['ID'].isin(rejected_sp)]
print('Total number of spindles: ', len(spindles))

Number of rejected events:  108
Total number of spindles:  6958


### Apply additional criteria for K-Complexes

Mark candidate events as K-Complexes if...
1. It is isolated from other SW's by +/- 1.5 seconds
2. Negative peak is asymmetrically prominent
    - Peak-to-peak ratio > 3
    - (or <0.66 if doing either positive or negative)
4. It has a sharp slope (> 0.5 rSD compared to other SW's) - DEPRECATED

Mark isolated events

In [13]:
isolated_events = mark_isolation(event_data = k_complexes, rej_int = 1.5)

k_complexes['isolated'] = False
k_complexes.loc[k_complexes['ID'].isin(isolated_events), 'isolated'] = True

Number of isolated events:  1454


Mark events with large peak-to-trough ratios

In [14]:
k_complexes['p2t_ratio'] = k_complexes['ValNegPeak'].abs() / k_complexes['ValPosPeak']
print('Distribution of p2t ratios:\n', k_complexes['p2t_ratio'].describe(), '\n')

k_complexes['p2t'] = False
#k_complexes.loc[(k_complexes['p2t_ratio'] > 2) | (k_complexes['p2t_ratio'] < 0.50), 'p2t'] = True
k_complexes.loc[k_complexes['p2t_ratio'] > 3, 'p2t'] = True

print('Number of large P2T-ratio K-Complexes: ', len(k_complexes.loc[k_complexes['p2t'] == True]))

Distribution of p2t ratios:
 count    1649.000000
mean        4.127466
std         4.434669
min         0.541837
25%         1.603966
50%         2.614959
75%         4.832712
max        46.926998
Name: p2t_ratio, dtype: float64 

Number of large P2T-ratio K-Complexes:  697


Mark events that have a relatively sharp slope

In [15]:
k_complexes['slope_zscore'] = k_complexes.groupby('Channel')['Slope'].transform(robust_zscore)

k_complexes['sharp_slope'] = False
k_complexes.loc[k_complexes['slope_zscore'] > 0.5, 'sharp_slope'] = True

print('Number of sharp slope K-Complexes: ', len(k_complexes.loc[k_complexes['sharp_slope'] == True]))

Number of sharp slope K-Complexes:  559


Create the final KC dataframe

In [16]:
k_complexes = k_complexes[(k_complexes['isolated'] == True) & 
                          (k_complexes['p2t'] == True)# & 
                          #(k_complexes['sharp_slope'] == True)
                          ]

print('Number of K-Complexes: ', len(k_complexes))

Number of K-Complexes:  628


### Remove channels with few events per minute

In [17]:
# Load hypnogram to calculate total SWS time
hypnogram = np.loadtxt(potato_path, delimiter = ',')
hypnogram = pd.DataFrame(hypnogram, columns = ['stage'])
sws_samples = len(hypnogram.loc[hypnogram['stage'].isin([2, 3]), 'stage'])
sws_minutes = (sws_samples / sampling_freq) / 60
print(np.round(sws_minutes, decimals = 1), 'minutes of SWS')

# Create event-per-minute summaries by channel
sw_sum = pd.DataFrame(slow_waves.groupby('Channel').Channel.count())
sw_sum.columns = ['sw']
sw_sum = sw_sum.reset_index()
sw_sum.sw = np.round(sw_sum.sw / sws_minutes, decimals = 1)

kc_sum = pd.DataFrame(k_complexes.groupby('Channel').Channel.count())
kc_sum.columns = ['kc']
kc_sum = kc_sum.reset_index()
kc_sum.kc = np.round(kc_sum.kc / sws_minutes, decimals = 1)

sp_sum = pd.DataFrame(spindles.groupby('Channel').Channel.count())
sp_sum.columns = ['sp']
sp_sum = sp_sum.reset_index()
sp_sum.sp = np.round(sp_sum.sp / sws_minutes, decimals = 1)

# Export events-per-minute (epm) summary by channel
event_sum = sw_sum.merge(kc_sum, on = 'Channel', how = 'outer')
event_sum = event_sum.merge(sp_sum, on = 'Channel', how = 'outer')
event_sum.to_csv(er_path, index = False)

# Remove channels with less than event threshold
sw_remove = sw_sum.loc[sw_sum.sw < sw_threshold, 'Channel']
slow_waves = slow_waves[~slow_waves['Channel'].isin(sw_remove)]

kc_remove = kc_sum.loc[kc_sum.kc < kc_threshold, 'Channel']
k_complexes = k_complexes[~k_complexes['Channel'].isin(kc_remove)]

sp_remove = sp_sum.loc[sp_sum.sp < sp_threshold, 'Channel']
spindles = spindles[~spindles['Channel'].isin(sp_remove)]

63.1 minutes of SWS


Export

In [18]:
slow_waves.to_csv(clean_sw_path, index = False)
k_complexes.to_csv(clean_kc_path, index = False)
spindles.to_csv(clean_sp_path, index = False)