# pipeline-full-2s-segs-oa-behavior

This notebook covers the pipeline for the neural noise analysis on full recordings, done only on older adults. A matrix that contains both their behavioral measures and slopes is constructed and exported. The notebook is structured as follows:

1. Import all subjects
2. For each subject, extract all segments that are marked for analysis. 
3. For each segment extracted, extract as many 2-second segments with 50% overlap as we can.
4. Use these segments to compute the PSD through Welch's method.
5. Calculate the slope of each channel, and find the mean slope for each subject. 
6. Import the behavior table and add all slope results. Export to a CSV.

In [1]:
%matplotlib inline
import os
import glob
import seaborn
import numpy as np
import scipy as sp
import pandas as pd
import scipy.io
import numpy.fft
import scipy.signal
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.stats import linregress
from sklearn import linear_model
mpl.rcParams['figure.figsize'] = (16, 10)

In [5]:
def get_filelist(import_path):
    matfiles = []
    for root, dirs, files in os.walk(import_path):
        matfiles += glob.glob(os.path.join(root, '*.mat'))
    return matfiles

def import_subject(subj, i, import_path):
    subj[i] = {}
    datafile = sp.io.loadmat(import_path)
    subj[i]['name'] = str(np.squeeze(datafile['name']))
    subj[i]['srate'] = int(np.squeeze(datafile['srate']))
    subj[i]['events'] = []
    for event in np.squeeze(datafile['evts']):
        subj[i]['events'].append([event[0][0], event[1][0][0], event[2][0][0]])
    subj[i]['data'] = np.squeeze(datafile['data'])
    subj[i]['nbchan'] = len(subj[i]['data'])
    return subj

def _print_window_info(events, port_code):
    evts = [[events[i][1], events[i+1][1]] for i in range(len(events)) if events[i][0] == port_code]
    total_wins = 0
    total_secs = 0
    for e in evts:
        if (e[1]-e[0]) >= 1024:
            pts  = e[1]-e[0]
            secs = (e[1]-e[0])//512
            nwin = (e[1]-e[0])//512-1
            total_wins += nwin
            total_secs += secs
            print('Event {}:\t{} points, {} seconds, {} windows'.format(e, pts, secs, nwin))
    print('Total windows able to be extracted: ', total_wins)

def get_windows(data, events, port_code, nperwindow=512*2, noverlap=512):
    windows = []
    # The following line restructures events of type port_code into the 
    # following format:
    #              [latency, end_of_event]
    evts = [[events[i][1], events[i+1][1]] for i in range(len(events)) if events[i][0] == port_code]
    for event in evts:
        if event[1]-event[0] >= nperwindow:
            nwindows = (event[1] - event[0])//noverlap - 1
            for i in range(nwindows):
                windows.append(data[event[0] + noverlap*i : event[0] + noverlap*i + nperwindow])
    return windows

def welch(windows, srate):
    """
    Takes a list of data segments (each size 1xN), computes each segment's PSD,
    and averages them to get a final PSD.
    """
    psds = [sp.signal.welch(window, srate, nperseg=len(window), window='hamming')[1] for window in windows]
    return np.mean(psds, axis=0)

def linreg_slope(f, psd, lofreq, hifreq):
    """
    Fits line to the PSD, using regular linear regression.
    Returns slope and fit line.
    """
    model = linear_model.LinearRegression()
    model.fit(f[2*2:24*2], np.log10(psd[2*2:24*2]))
    fit_line = model.predict(f)
    return model.coef_[0] * (10**2), fit_line

def ransac_slope(f, psd, lofreq, hifreq):
    """
    Robustly fits line to the PSD, using the RANSAC algorithm. 
    Returns slope and fit line.
    """
    model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression())
    model_ransac.fit(f[lofreq*2:hifreq*2], np.log10(psd[lofreq*2:hifreq*2]))
    fit_line = model_ransac.predict(f)
    return model_ransac.estimator_.coef_[0] * (10**2), fit_line

def remove_freq_buffer(data, lofreq, hifreq):
    """
    Removes a frequency buffer from a PSD or frequency vector.
    """
    data = np.delete(data, range(lofreq*2, hifreq*2))
    return data.reshape(len(data), 1)

def compute_subject_psds(import_path):
    """
    Import all subjects and compute per-channel as well as average PSDs.
    """
    matfiles = get_filelist(import_path)
    
    # Temporary -- I believe these aren't included in the samples-features matrix
    if import_path == '../data/pipeline-full/oaExclFiltCARClust-mat/':
        matfiles.remove('../data/pipeline-full/oaExclFiltCARClust-mat/120127132.mat')
        matfiles.remove('../data/pipeline-full/oaExclFiltCARClust-mat/120127133.mat')
        matfiles.remove('../data/pipeline-full/oaExclFiltCARClust-mat/120127134.mat')
        matfiles.remove('../data/pipeline-full/oaExclFiltCARClust-mat/120127140.mat')
        matfiles.remove('../data/pipeline-full/oaExclFiltCARClust-mat/120127154.mat')
        matfiles.remove('../data/pipeline-full/oaExclFiltCARClust-mat/120127159.mat')
        matfiles.remove('../data/pipeline-full/oaExclFiltCARClust-mat/120127160.mat')
        matfiles.remove('../data/pipeline-full/oaExclFiltCARClust-mat/120127167.mat')

    subj = {}
    subj['nbsubj'] = len(matfiles)
    subj['f'] = np.linspace(0, 256, 513)
    subj['f'] = subj['f'].reshape(len(subj['f']), 1)
    subj['f_rm_alpha'] = remove_freq_buffer(subj['f'], 7, 14)
    for i in range(len(matfiles)):
        subj = import_subject(subj, i, matfiles[i])
        for ch in range(subj[i]['nbchan']):
            subj[i][ch] = {}
            eyesC_windows = get_windows(subj[i]['data'][ch], subj[i]['events'], 'C1')
            eyesO_windows = get_windows(subj[i]['data'][ch], subj[i]['events'], 'O1')
            subj[i][ch]['eyesC_psd'] = welch(eyesC_windows, 512)
            subj[i][ch]['eyesO_psd'] = welch(eyesO_windows, 512)
            subj[i][ch]['eyesC_psd_rm_alpha'] = remove_freq_buffer(subj[i][ch]['eyesC_psd'], 7, 14)
            subj[i][ch]['eyesO_psd_rm_alpha'] = remove_freq_buffer(subj[i][ch]['eyesO_psd'], 7, 14)
        subj[i]['data'] = np.nan # No longer needed, so clear it from memory
        subj[i]['eyesC_psd'] = np.mean([subj[i][ch]['eyesC_psd'] for ch in range(subj[i]['nbchan'])], axis=0)
        subj[i]['eyesO_psd'] = np.mean([subj[i][ch]['eyesO_psd'] for ch in range(subj[i]['nbchan'])], axis=0)
        subj[i]['eyesC_psd_rm_alpha'] = remove_freq_buffer(subj[i]['eyesC_psd'], 7, 14)
        subj[i]['eyesO_psd_rm_alpha'] = remove_freq_buffer(subj[i]['eyesO_psd'], 7, 14)
        print("Processed: ", subj[i]['name'])
    subj['eyesC_psd'] = np.mean([subj[i]['eyesC_psd'] for i in range(subj['nbsubj'])], axis=0)
    subj['eyesO_psd'] = np.mean([subj[i]['eyesO_psd'] for i in range(subj['nbsubj'])], axis=0)
    return subj

def fit_slopes(subj, regr_func, lofreq, hifreq):
    # Fitting on the grand average PSD of all subjects
    eyesC_slope_and_fitline = regr_func(subj['f'], subj['eyesC_psd'], lofreq, hifreq)
    eyesO_slope_and_fitline = regr_func(subj['f'], subj['eyesO_psd'], lofreq, hifreq)
    subj['eyesC_slope'], subj['eyesC_fitline'] = eyesC_slope_and_fitline
    subj['eyesO_slope'], subj['eyesO_fitline'] = eyesO_slope_and_fitline
    for i in range(subj['nbsubj']):
        # Per-subject PSD average fitting
        eyesC_slope_and_fitline = regr_func(subj['f'], subj[i]['eyesC_psd'], lofreq, hifreq)
        eyesO_slope_and_fitline = regr_func(subj['f'], subj[i]['eyesO_psd'], lofreq, hifreq)
        subj[i]['eyesC_slope'], subj[i]['eyesC_fitline'] = eyesC_slope_and_fitline
        subj[i]['eyesO_slope'], subj[i]['eyesO_fitline'] = eyesO_slope_and_fitline
        for ch in range(subj[i]['nbchan']):
            # Per-channel PSD fitting
            eyesC_slope_and_fitline = regr_func(subj['f'], subj[i][ch]['eyesC_psd_rm_alpha'], lofreq, hifreq)
            eyesO_slope_and_fitline = regr_func(subj['f'], subj[i][ch]['eyesO_psd_rm_alpha'], lofreq, hifreq)
            subj[i][ch]['eyesC_slope'], subj[i][ch]['eyesC_fitline'] = eyesC_slope_and_fitline
            subj[i][ch]['eyesO_slope'], subj[i][ch]['eyesO_fitline'] = eyesO_slope_and_fitline
    return subj

Now we import each subject, compute their PSD for both eyes closed and eyes open segments, fit a line to the slope, and find the average psd and slope.

# Importing subjects and computing the PSD

In [3]:
subjoa = compute_subject_psds('../data/pipeline-full/oaExclFiltCARClust-mat/')
np.save('../data/pipeline-full/subjoa-no-fitting.npy', subjoa); subjoa = []
subjya = compute_subject_psds('../data/pipeline-full/yaExclFiltCARClust-mat/')
np.save('../data/pipeline-full/subjya-no-fitting.npy', subjya); subjya = []

Processed:  120127101
Processed:  120127102
Processed:  120127103
Processed:  120127104
Processed:  120127105
Processed:  120127106
Processed:  120127107
Processed:  120127108
Processed:  120127109
Processed:  120127110
Processed:  120127111
Processed:  120127112
Processed:  120127113
Processed:  120127114
Processed:  120127115
Processed:  120127116
Processed:  120127117
Processed:  120127118
Processed:  120127119
Processed:  120127120
Processed:  120127121
Processed:  120127122
Processed:  120127123
Processed:  120127124
Processed:  120127125
Processed:  120127128
Processed:  120127130
Processed:  120127131
Processed:  120127135
Processed:  120127137
Processed:  120127138
Processed:  120127139
Processed:  120127142
Processed:  120127144
Processed:  120127145
Processed:  120127146
Processed:  120127147
Processed:  120127148
Processed:  120127149
Processed:  120127151
Processed:  120127153
Processed:  120127155
Processed:  120127156
Processed:  120127157
Processed:  120127158
Processed:

# Fit to PSD slopes and save results

In [6]:
subjoa = np.load('../data/pipeline-full/subjoa-no-fitting.npy').item()
subjya = np.load('../data/pipeline-full/subjya-no-fitting.npy').item()
subjoa = fit_slopes(subjoa, linreg_slope, 2, 24)
subjya = fit_slopes(subjya, linreg_slope, 2, 24)

In [17]:
for i in range(subjoa['nbsubj']):
    for ch in range(subjoa[i]['nbchan']):
        subjoa[i][ch]['eyesC_slope'] = subjoa[i][ch]['eyesC_slope'][0]
        subjoa[i][ch]['eyesO_slope'] = subjoa[i][ch]['eyesO_slope'][0]
for i in range(subjya['nbsubj']):
    for ch in range(subjya[i]['nbchan']):
        subjya[i][ch]['eyesC_slope'] = subjya[i][ch]['eyesC_slope'][0]
        subjya[i][ch]['eyesO_slope'] = subjya[i][ch]['eyesO_slope'][0]  

In [18]:
np.save('../data/pipeline-full/subjoa-2-24fit.npy', subjoa)
np.save('../data/pipeline-full/subjya-2-24fit.npy', subjya)

# Stats

First, let's construct a samples-features matrix so we can send it to Bob and perform statistics on results.

Average of:
- A29, A30, A31, B23, B24, B26

In [9]:
older = ["120127142","120127146","120127130","120127103","120127123","120127131","120127121","120127102","120127166","120127164","120127165","120127170","120127112","120127116","120127161","120127158","120127168","120127162","120127149","120127169"]
younger = ["112118351","112118463","112118526","112118578","112118642","1121181183","1121181262","1121181494","112118475","112118553","112118761","112118785","112118468","112118470","1121181424","112118131","112118257","112118373","112118416","112118479","1121181181","1121181393","1121181510","112118508","112118723","1121181517"]

In [20]:
df = pd.read_csv('../data/oa-behavior-and-slopes-20s-eyesc.csv')
df.head()

Unnamed: 0,SUBJECT,CLASS,AGE,SEX,RAVLT_DEL,EDUCATION,SEX_NUMBERIC,ANIMALS,BNT_30,LM_1,...,B26,B27,B28,B29,B30,FRONTAL,LTEMPORAL,CENTRAL,RTEMPORAL,OCCIPITAL
0,127101,SA_Control,80.84873,2,3,15,2,18,27,10,...,-4.748383,-5.950079,-6.606615,-5.917265,-4.76672,-6.458774,-6.633731,-7.051571,-6.169163,-5.433811
1,127102,MCI_Control,71.8987,1,8,16,1,22,30,15,...,-4.621796,-5.606016,-6.762557,-5.917265,-4.28551,-6.458774,-6.737865,-6.479462,-6.818404,-5.433811
2,127103,MCI_Control,69.43463,2,11,14,2,29,29,7,...,-0.981276,-2.358701,-3.011193,-2.159953,-1.578591,-0.696107,-4.134891,-4.065552,-3.002314,0.712028
3,127104,SA_Control,83.61396,2,8,14,2,22,27,16,...,-5.393366,-4.7297,-4.50791,-4.547847,-5.036724,-3.482842,-4.028538,-6.004186,-7.831297,-4.690516
4,127105,SA_Control,80.69541,2,7,18,2,17,28,20,...,-5.109961,-5.684171,-5.320151,-5.582867,-6.15003,-4.585814,-5.216128,-5.562051,-5.251369,-6.189279


In [22]:
channels = ["A1","A2","A3","A4","A5","A6","A7","A8","A10","A11","A12","A13","A14","A15","A16","A17","A18","A21","A22","A23","A24","A25","A26","A27","A29","A30","A31","B1","B2","B3","B4","B5","B6","B8","B9","B10","B11","B12","B13","B14","B17","B18","B19","B20","B21","B22","B23","B24","B26","B27","B28","B29","B30","FRONTAL","LTEMPORAL","CENTRAL","RTEMPORAL","OCCIPITAL"]
df['AVG_PSD_EYESC'] = [subjoa['eyesC_slope'] for i in range(subjoa['nbsubj'])]
df['AVG_PSD_EYESO'] = [subjoa['eyesO_slope'] for i in range(subjoa['nbsubj'])]
for ch in range(len(channels)):
    df[channels[ch] + '_EYESC'] = [subjoa[i][ch]['eyesC_slope'] for i in range(subjoa['nbsubj'])]
    df[channels[ch] + '_EYESO'] = [subjoa[i][ch]['eyesO_slope'] for i in range(subjoa['nbsubj'])]

In [28]:
# Now append '_20S_EYESC' to all of the first slope measures
cols_20s = {'AVG_PSD_SLOPE': 'AVG_PSD_SLOPE_20S_EYESC', 'A1': 'A1_20S_EYESC', 'A2': 'A2_20S_EYESC', 'A3': 'A3_20S_EYESC', 'A4': 'A4_20S_EYESC', 'A5': 'A5_20S_EYESC', 'A6': 'A6_20S_EYESC', 'A7': 'A7_20S_EYESC', 'A8': 'A8_20S_EYESC', 'A10': 'A10_20S_EYESC', 'A11': 'A11_20S_EYESC', 'A12': 'A12_20S_EYESC', 'A13': 'A13_20S_EYESC', 'A14': 'A14_20S_EYESC', 'A15': 'A15_20S_EYESC', 'A16': 'A16_20S_EYESC', 'A17': 'A17_20S_EYESC', 'A18': 'A18_20S_EYESC', 'A21': 'A21_20S_EYESC', 'A22': 'A22_20S_EYESC', 'A23': 'A23_20S_EYESC', 'A24': 'A24_20S_EYESC', 'A25': 'A25_20S_EYESC', 'A26': 'A26_20S_EYESC', 'A27': 'A27_20S_EYESC', 'A29': 'A29_20S_EYESC', 'A30': 'A30_20S_EYESC', 'A31': 'A31_20S_EYESC', 'B1': 'B1_20S_EYESC', 'B2': 'B2_20S_EYESC', 'B3': 'B3_20S_EYESC', 'B4': 'B4_20S_EYESC', 'B5': 'B5_20S_EYESC', 'B6': 'B6_20S_EYESC', 'B8': 'B8_20S_EYESC', 'B9': 'B9_20S_EYESC', 'B10': 'B10_20S_EYESC', 'B11': 'B11_20S_EYESC', 'B12': 'B12_20S_EYESC', 'B13': 'B13_20S_EYESC', 'B14': 'B14_20S_EYESC', 'B17': 'B17_20S_EYESC', 'B18': 'B18_20S_EYESC', 'B19': 'B19_20S_EYESC', 'B20': 'B20_20S_EYESC', 'B21': 'B21_20S_EYESC', 'B22': 'B22_20S_EYESC', 'B23': 'B23_20S_EYESC', 'B24': 'B24_20S_EYESC', 'B26': 'B26_20S_EYESC', 'B27': 'B27_20S_EYESC', 'B28': 'B28_20S_EYESC', 'B29': 'B29_20S_EYESC', 'B30': 'B30_20S_EYESC', 'FRONTAL': 'FRONTAL_20S_EYESC', 'LTEMPORAL': 'LTEMPORAL_20S_EYESC', 'CENTRAL': 'CENTRAL_20S_EYESC', 'RTEMPORAL': 'RTEMPORAL_20S_EYESC', 'OCCIPITAL': 'OCCIPITAL_20S_EYESC'}
df.rename(columns=cols_20s, inplace=True)

In [30]:
# Todo: Add columns that says how many segments were able to be extracted from the recording
print(list(df.columns))

['SUBJECT', 'CLASS', 'AGE', 'SEX', 'RAVLT_DEL', 'EDUCATION', 'SEX_NUMBERIC', 'ANIMALS', 'BNT_30', 'LM_1', 'LM_2', 'MMSE', 'TMT_A', 'TMT_B', 'DSPAN_B', 'DSPAN_F', 'DSYMBOL', 'LEARNER', 'RB1_ACC', 'RB1_RT', 'RB1_ACC_F', 'RB1_F_RT', 'RB1_N_ACC', 'RB1_N_RT', 'RB2_ACC', 'RB2_RT', 'RB2_ACC_F', 'RB2_F_RT', 'RB2_N_ACC', 'RB2_N_RT', 'RB_CHECK_F_ACC', 'RB_CHECK_N_ACC', 'RB_CHECK_TOTAL_ACC', 'GNG_GO_HR', 'GNG_GO_MISS', 'GNG_NOGO_CR', 'GNG_NOGO_FA', 'GNG_GO_RT', 'GNG_NOGOINCORR_RT', 'AVG_PSD_SLOPE_20S_EYESC', 'A1_20S_EYESC', 'A2_20S_EYESC', 'A3_20S_EYESC', 'A4_20S_EYESC', 'A5_20S_EYESC', 'A6_20S_EYESC', 'A7_20S_EYESC', 'A8_20S_EYESC', 'A10_20S_EYESC', 'A11_20S_EYESC', 'A12_20S_EYESC', 'A13_20S_EYESC', 'A14_20S_EYESC', 'A15_20S_EYESC', 'A16_20S_EYESC', 'A17_20S_EYESC', 'A18_20S_EYESC', 'A21_20S_EYESC', 'A22_20S_EYESC', 'A23_20S_EYESC', 'A24_20S_EYESC', 'A25_20S_EYESC', 'A26_20S_EYESC', 'A27_20S_EYESC', 'A29_20S_EYESC', 'A30_20S_EYESC', 'A31_20S_EYESC', 'B1_20S_EYESC', 'B2_20S_EYESC', 'B3_20S_EYESC'

In [31]:
df.to_csv('../data/pipeline-full/oa-behavior-slope-eyesc-eyeso-full.csv', index_col=0)

In [41]:
df.drop(df.columns[39:98], axis=1, inplace=True)

In [43]:
print(list(df.columns))

['SUBJECT', 'CLASS', 'AGE', 'SEX', 'RAVLT_DEL', 'EDUCATION', 'SEX_NUMBERIC', 'ANIMALS', 'BNT_30', 'LM_1', 'LM_2', 'MMSE', 'TMT_A', 'TMT_B', 'DSPAN_B', 'DSPAN_F', 'DSYMBOL', 'LEARNER', 'RB1_ACC', 'RB1_RT', 'RB1_ACC_F', 'RB1_F_RT', 'RB1_N_ACC', 'RB1_N_RT', 'RB2_ACC', 'RB2_RT', 'RB2_ACC_F', 'RB2_F_RT', 'RB2_N_ACC', 'RB2_N_RT', 'RB_CHECK_F_ACC', 'RB_CHECK_N_ACC', 'RB_CHECK_TOTAL_ACC', 'GNG_GO_HR', 'GNG_GO_MISS', 'GNG_NOGO_CR', 'GNG_NOGO_FA', 'GNG_GO_RT', 'GNG_NOGOINCORR_RT', 'AVG_PSD_EYESC', 'AVG_PSD_EYESO', 'A1_EYESC', 'A1_EYESO', 'A2_EYESC', 'A2_EYESO', 'A3_EYESC', 'A3_EYESO', 'A4_EYESC', 'A4_EYESO', 'A5_EYESC', 'A5_EYESO', 'A6_EYESC', 'A6_EYESO', 'A7_EYESC', 'A7_EYESO', 'A8_EYESC', 'A8_EYESO', 'A10_EYESC', 'A10_EYESO', 'A11_EYESC', 'A11_EYESO', 'A12_EYESC', 'A12_EYESO', 'A13_EYESC', 'A13_EYESO', 'A14_EYESC', 'A14_EYESO', 'A15_EYESC', 'A15_EYESO', 'A16_EYESC', 'A16_EYESO', 'A17_EYESC', 'A17_EYESO', 'A18_EYESC', 'A18_EYESO', 'A21_EYESC', 'A21_EYESO', 'A22_EYESC', 'A22_EYESO', 'A23_EYESC',

In [44]:
df.to_csv('../data/pipeline-full/oa-behavior-slope-eyesc-eyeso-full.csv', index_col=0)