# Data  Preprocessing
##### Jackson Cornell and Max Rosenzweig
Each block needs to be run in order. For "Select sample start and end points" user input is needed, as explained before that block.

### Imports

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.io

from scipy import signal
from scipy.fft import fftshift
from statsmodels.graphics.tsaplots import plot_acf

### Load Data
Change `..\data\ET003M12D12taskstim.mat` to point to your matlab file containing the output of `demo.m`

In [None]:
# read mat file
data = scipy.io.loadmat(r'..\data\ET003M12D12taskstim.mat')

# load lfp and acc to arrays
lfp_data = np.array(data['all']['LFP'][0][0][0])
acc_data = np.array(data['all']['acc'][0][0][0])

# parameters
recording_cnt = np.shape(acc_data)[0]
acc_ch_cnt = 3
lfp_ch_cnt_raw = 4
lfp_ch_cnt = 2
fs_lfp = 250
fs_acc = 518

### Filter Design

In [None]:
# Bandpass filter design
b_bp = [0.0010585235683982575,  # Gamma Band 30-80 Hz
  -0.006016459652642086,
  -0.013713280775328133,
  0.008839143246220272,
  0.012355691459236085,
  0.0006565252315114365,
  0.01627617053350753,
  0.008535035961564507,
  -0.020139981607238684,
  -0.003045984771212982,
  -0.013463849140202367,
  -0.037881809346685634,
  0.014373176548413006,
  0.0207243789332884,
  -0.0014500788974829845,
  0.07189389025348955,
  0.04044800252470767,
  -0.050763539100460074,
  0.021399346281950896,
  -0.08920896743997705,
  -0.29210690983600424,
  0.08022287852869205,
  0.469926378680085,
  0.08022287852869205,
  -0.29210690983600424,
  -0.08920896743997705,
  0.021399346281950896,
  -0.050763539100460074,
  0.04044800252470767,
  0.07189389025348955,
  -0.0014500788974829845,
  0.0207243789332884,
  0.014373176548413006,
  -0.037881809346685634,
  -0.013463849140202367,
  -0.003045984771212982,
  -0.020139981607238684,
  0.008535035961564507,
  0.01627617053350753,
  0.0006565252315114365,
  0.012355691459236085,
  0.008839143246220272,
  -0.013713280775328133,
  -0.006016459652642086,
  0.0010585235683982575]
a_bp = [1]

In [None]:
# FIR filter design
b = [-0.00522976969654727,
     -0.000560433515183372,
     -0.0005879046685356189,
     -0.0006144098787837698,
     -0.0006398258232683617,
     -0.0006639488421795402,
     -0.000686701060494037,
     -0.0007078690575819224,
     -0.00072734046615764,
     -0.0007450124223546479,
     -0.0007607497282814199,
     -0.000774361286844492,
     -0.0007856896266741399,
     -0.0007945797718139921,
     -0.0008008960134012433,
     -0.0008045490536507538,
     -0.000805463471141381,
     -0.0008035202546359405,
     -0.0007985615438343808,
     -0.0007904512052608203,
     -0.0007790945975853025,
     -0.0007645079195236311,
     -0.0007466898038610989,
     -0.0007255005464266609,
     -0.0007006666256202271,
     -0.0006723205122973175,
     -0.0006406380978287092,
     -0.0006055809364516472,
     -0.00056641969033693,
     -0.0005245233867090431,
     -0.0004786983666658729,
     -0.00042963366025958297,
     -0.0003772605108028464,
     -0.0003217254816247227,
     -0.00026302308003315977,
     -0.00020137007261899283,
     -0.0001369112524187922,
     -0.0000697582046867073,
     -6.653947450723948e-8,
     0.0000719192694666781,
     0.00014599342539527496,
     0.0002219771203020964,
     0.00029967700833921564,
     0.000378859591784035,
     0.00045925514581032556,
     0.0005405913923955102,
     0.0006226279819153048,
     0.000705133391619707,
     0.0007877892564508873,
     0.000870277161388022,
     0.0009523062265110934,
     0.0010335812596497876,
     0.0011136535615135132,
     0.0011922622867316381,
     0.0012689533594025912,
     0.0013438495160740201,
     0.0014152538149068907,
     0.0014847808529905682,
     0.0015504341864223043,
     0.0016122740460248236,
     0.0016703844602498261,
     0.001724430461265395,
     0.0017737716914853546,
     0.0018181526152067746,
     0.0018574658246454238,
     0.0018915648182749381,
     0.0019201692891223678,
     0.0019429540381374617,
     0.0019595712367353837,
     0.001969728013003854,
     0.001973174755416274,
     0.001969649816541209,
     0.0019588793495544936,
     0.0019406314141150864,
     0.0019147598332715644,
     0.0018812440215329833,
     0.0018402895419123925,
     0.0017922303635599021,
     0.0017373750571295728,
     0.0016757442148791937,
     0.0016068876674971424,
     0.0015293377917096392,
     0.0014411535605955985,
     0.001342362316285495,
     0.0012525215938752991,
     0.0011409211983264451,
     0.0010277029216358565,
     0.0009074587212922,
     0.0007809730726638415,
     0.0006485398152999211,
     0.0005105933234803262,
     0.00036742639375998037,
     0.00021940158417361813,
     0.0000669548620298512,
     -0.00008942563762639262,
     -0.00024926544185230575,
     -0.0004120377757146344,
     -0.0005772089443642813,
     -0.0007442590091905181,
     -0.0009126354429600253,
     -0.0010817169459567918,
     -0.0012508436698857252,
     -0.0014193672760023062,
     -0.0015866631951609983,
     -0.001752154318434934,
     -0.0019151565029332264,
     -0.0020748646796718124,
     -0.002230489310025245,
     -0.002381540094072683,
     -0.002527287833663975,
     -0.00266690017435714,
     -0.0027993865980285577,
     -0.0029249547460977313,
     -0.003041697039306195,
     -0.0031497874675384518,
     -0.00324825236398359,
     -0.003336449486802089,
     -0.003413656818301452,
     -0.003479385699232179,
     -0.003532953533382869,
     -0.003573702686940722,
     -0.003601080940219207,
     -0.0036146038616615824,
     -0.003613721268072974,
     -0.003597892718574346,
     -0.003566632302888172,
     -0.003519525566908044,
     -0.00345621373796425,
     -0.0033763542168496075,
     -0.0032796081258571557,
     -0.003165691817130507,
     -0.0030344279733142783,
     -0.002885642872277675,
     -0.0027192001295324353,
     -0.0025350358230754695,
     -0.0023331829374547775,
     -0.00211352970399029,
     -0.0018761957153400509,
     -0.0016209640030349812,
     -0.0013492945649294067,
     -0.0010585067913991312,
     -0.0007524231429819083,
     -0.0004292970702961897,
     -0.0000892473562811366,
     0.00026671048293896185,
     0.0006376813242390007,
     0.0010233719189827117,
     0.0014234342677589221,
     0.0018373207087292727,
     0.0022642922016364252,
     0.0027035648624877343,
     0.0031543572306565655,
     0.003615966140281129,
     0.004087683851145636,
     0.004568697950419642,
     0.005058066423553038,
     0.005554741092409959,
     0.006057613317214209,
     0.0065656141229951245,
     0.007077836035071489,
     0.007593449769673476,
     0.008111607456162538,
     0.008631240834625018,
     0.009150952454600968,
     0.009668948623076993,
     0.010184084354358074,
     0.01069660143733805,
     0.011207367787045883,
     0.011708093823712695,
     0.012205096804603435,
     0.012693362997581694,
     0.013172878963222513,
     0.013642441780895263,
     0.01410106750832395,
     0.014547598785280282,
     0.01498101647815172,
     0.015400338031201282,
     0.015804588264146748,
     0.01619279264012354,
     0.016564054820840564,
     0.016917482329548313,
     0.01725219847146682,
     0.017567402022023854,
     0.01786238083860661,
     0.01813647125115341,
     0.01838902356524701,
     0.01861940141435486,
     0.018826999987647308,
     0.01901137598703197,
     0.019172162740592583,
     0.019308947017529412,
     0.01942123072843263,
     0.019508866141593598,
     0.01957166075207318,
     0.019609519394438325,
     0.01962179475977161,
     0.019609519394438325,
     0.01957166075207318,
     0.019508866141593598,
     0.01942123072843263,
     0.019308947017529412,
     0.019172162740592583,
     0.01901137598703197,
     0.018826999987647308,
     0.01861940141435486,
     0.01838902356524701,
     0.01813647125115341,
     0.01786238083860661,
     0.017567402022023854,
     0.01725219847146682,
     0.016917482329548313,
     0.016564054820840564,
     0.01619279264012354,
     0.015804588264146748,
     0.015400338031201282,
     0.01498101647815172,
     0.014547598785280282,
     0.01410106750832395,
     0.013642441780895263,
     0.013172878963222513,
     0.012693362997581694,
     0.012205096804603435,
     0.011708093823712695,
     0.011207367787045883,
     0.01069660143733805,
     0.010184084354358074,
     0.009668948623076993,
     0.009150952454600968,
     0.008631240834625018,
     0.008111607456162538,
     0.007593449769673476,
     0.007077836035071489,
     0.0065656141229951245,
     0.006057613317214209,
     0.005554741092409959,
     0.005058066423553038,
     0.004568697950419642,
     0.004087683851145636,
     0.003615966140281129,
     0.0031543572306565655,
     0.0027035648624877343,
     0.0022642922016364252,
     0.0018373207087292727,
     0.0014234342677589221,
     0.0010233719189827117,
     0.0006376813242390007,
     0.00026671048293896185,
     -0.0000892473562811366,
     -0.0004292970702961897,
     -0.0007524231429819083,
     -0.0010585067913991312,
     -0.0013492945649294067,
     -0.0016209640030349812,
     -0.0018761957153400509,
     -0.00211352970399029,
     -0.0023331829374547775,
     -0.0025350358230754695,
     -0.0027192001295324353,
     -0.002885642872277675,
     -0.0030344279733142783,
     -0.003165691817130507,
     -0.0032796081258571557,
     -0.0033763542168496075,
     -0.00345621373796425,
     -0.003519525566908044,
     -0.003566632302888172,
     -0.003597892718574346,
     -0.003613721268072974,
     -0.0036146038616615824,
     -0.003601080940219207,
     -0.003573702686940722,
     -0.003532953533382869,
     -0.003479385699232179,
     -0.003413656818301452,
     -0.003336449486802089,
     -0.00324825236398359,
     -0.0031497874675384518,
     -0.003041697039306195,
     -0.0029249547460977313,
     -0.0027993865980285577,
     -0.00266690017435714,
     -0.002527287833663975,
     -0.002381540094072683,
     -0.002230489310025245,
     -0.0020748646796718124,
     -0.0019151565029332264,
     -0.001752154318434934,
     -0.0015866631951609983,
     -0.0014193672760023062,
     -0.0012508436698857252,
     -0.0010817169459567918,
     -0.0009126354429600253,
     -0.0007442590091905181,
     -0.0005772089443642813,
     -0.0004120377757146344,
     -0.00024926544185230575,
     -0.00008942563762639262,
     0.0000669548620298512,
     0.00021940158417361813,
     0.00036742639375998037,
     0.0005105933234803262,
     0.0006485398152999211,
     0.0007809730726638415,
     0.0009074587212922,
     0.0010277029216358565,
     0.0011409211983264451,
     0.0012525215938752991,
     0.001342362316285495,
     0.0014411535605955985,
     0.0015293377917096392,
     0.0016068876674971424,
     0.0016757442148791937,
     0.0017373750571295728,
     0.0017922303635599021,
     0.0018402895419123925,
     0.0018812440215329833,
     0.0019147598332715644,
     0.0019406314141150864,
     0.0019588793495544936,
     0.001969649816541209,
     0.001973174755416274,
     0.001969728013003854,
     0.0019595712367353837,
     0.0019429540381374617,
     0.0019201692891223678,
     0.0018915648182749381,
     0.0018574658246454238,
     0.0018181526152067746,
     0.0017737716914853546,
     0.001724430461265395,
     0.0016703844602498261,
     0.0016122740460248236,
     0.0015504341864223043,
     0.0014847808529905682,
     0.0014152538149068907,
     0.0013438495160740201,
     0.0012689533594025912,
     0.0011922622867316381,
     0.0011136535615135132,
     0.0010335812596497876,
     0.0009523062265110934,
     0.000870277161388022,
     0.0007877892564508873,
     0.000705133391619707,
     0.0006226279819153048,
     0.0005405913923955102,
     0.00045925514581032556,
     0.000378859591784035,
     0.00029967700833921564,
     0.0002219771203020964,
     0.00014599342539527496,
     0.0000719192694666781,
     -6.653947450723948e-8,
     -0.0000697582046867073,
     -0.0001369112524187922,
     -0.00020137007261899283,
     -0.00026302308003315977,
     -0.0003217254816247227,
     -0.0003772605108028464,
     -0.00042963366025958297,
     -0.0004786983666658729,
     -0.0005245233867090431,
     -0.00056641969033693,
     -0.0006055809364516472,
     -0.0006406380978287092,
     -0.0006723205122973175,
     -0.0007006666256202271,
     -0.0007255005464266609,
     -0.0007466898038610989,
     -0.0007645079195236311,
     -0.0007790945975853025,
     -0.0007904512052608203,
     -0.0007985615438343808,
     -0.0008035202546359405,
     -0.000805463471141381,
     -0.0008045490536507538,
     -0.0008008960134012433,
     -0.0007945797718139921,
     -0.0007856896266741399,
     -0.000774361286844492,
     -0.0007607497282814199,
     -0.0007450124223546479,
     -0.00072734046615764,
     -0.0007078690575819224,
     -0.000686701060494037,
     -0.0006639488421795402,
     -0.0006398258232683617,
     -0.0006144098787837698,
     -0.0005879046685356189,
     -0.000560433515183372,
     -0.00522976969654727]
a = [1]

### Select sample start and end points
Run this block once. For each trial, the LFP and ACC data will be shown. First, input the offset from the start of the trial in LFP time steps. Second, input the offset from the end of the trial in LFP time steps. Last, type `y` to accept and `n` to reject the start and end points.

To reject a trial and remove it from the dataset, type `-1` for the start point and anything for the end.

In [None]:
# RUN ONLY ONCE; this will overwrite lfp_data and acc_data

from IPython.display import clear_output

remove_indices = []

for i in range(recording_cnt):
    # get LFP differential channels. usable data is in the range [-0.005, 0.005]
    lfp_c1 = lfp_data[i][:, 1] - lfp_data[i][:, 0]
    lfp_c2 = lfp_data[i][:, 3] - lfp_data[i][:, 2]
    acc = acc_data[i]
    acc_normal = []
    
    lfp_size = len(lfp_c1)
    acc_size = len(acc)

    lfp_data_start_index = 0
    acc_data_start_index = int(lfp_data_start_index * fs_acc / fs_lfp)
    lfp_data_end_index = 0
    acc_data_end_index = int(lfp_data_end_index * fs_acc / fs_lfp)
    accept = False

    while (not accept):
        clear_output(wait=True)

        plt.plot(lfp_c1)
        plt.title("LFP Recording Diff 0")
        plt.xlabel('Time')
        plt.ylabel('Voltage')
        plt.show()

        plt.plot(lfp_c2)
        plt.title("LFP Recording Diff 1")
        plt.xlabel('Time')
        plt.ylabel('Voltage')
        plt.show()

        plt.plot(acc)
        plt.title("ACC Recording")
        plt.xlabel('Time')
        plt.ylabel('Voltage')
        plt.show()

        lfp_start_index = int(input('Start: '))
        acc_start_index = int(lfp_start_index * acc_size / lfp_size)
        lfp_end_index = int(input('End: '))
        acc_end_index = acc_size - int(lfp_end_index * acc_size / lfp_size)
        lfp_end_index = lfp_size - lfp_end_index
        clear_output(wait=True)
        
        if lfp_start_index == -1:
            print('Throw out sample')
            accept = input('Accept? (y/n): ') == 'y'
            if accept:
                remove_indices.append(i)
        else:
            plt.plot(lfp_c1[lfp_start_index:lfp_end_index])
            plt.title("LFP Recording Diff 0")
            plt.xlabel('Time')
            plt.ylabel('Voltage')
            plt.show()

            plt.plot(lfp_c2[lfp_start_index:lfp_end_index])
            plt.title("LFP Recording Diff 1")
            plt.xlabel('Time')
            plt.ylabel('Voltage')
            plt.show()

            plt.plot(acc[acc_start_index:acc_end_index])
            plt.title("ACC Recording")
            plt.xlabel('Time')
            plt.ylabel('Voltage')
            plt.show()

            accept = input('Accept? (y/n): ') == 'y'
            if accept:
                lfp_data[i] = lfp_data[i][lfp_start_index:lfp_end_index, :]
                acc_data[i] = acc_data[i][acc_start_index:acc_end_index]

clear_output(wait=True)
lfp_data = np.delete(lfp_data, obj=remove_indices, axis=0)
acc_data = np.delete(acc_data, obj=remove_indices, axis=0)

recording_cnt = np.shape(acc_data)[0]
print(str(recording_cnt) + ' samples loaded.')

### Set Training/Testing split
Set these values according to the output of the previous cell for an 80/20 training/testing split.

In [None]:
train_recording_cnt = 20
test_recording_cnt = 4

#### Resample accelerometer data for KLMS and CNN

In [None]:
# parameters
resampled_acc = []
upsample_factor = 125
downsample_factor = 259
ds_factor1 = 37
ds_factor2 = 7
assert ds_factor1 * ds_factor2 == downsample_factor

# resample accelerometer data
for i in range(recording_cnt) :
    channel_data = []
    for j in range(acc_ch_cnt) :
        # interpolation
        data_length = np.shape(acc_data[i])[0]
        upsampled_acc = np.interp(np.linspace(0.0, 1.0, upsample_factor * data_length, endpoint=False),
                                  np.linspace(0.0, 1.0, data_length, endpoint=False),
                                  acc_data[i][:, j])
        # decimation
        resampled_acc_t1 = signal.decimate(upsampled_acc, ds_factor1)  # performed twice due to rounding errors
        resampled_acc_t2 = signal.decimate(resampled_acc_t1, ds_factor2)
        # save channel data
        channel_data.append(resampled_acc_t2)
    # save recording data
    resampled_acc.append(channel_data.copy())

#### Apply filtering and second downsampling

In [None]:
# parameters
filtered_acc = []
downsampled_acc = []
filter_delay = int(len(b) / 2)
ds_factor1 = 10
ds_factor2 = 5
front_append = 1  # to line up with spectrogram data

# filter and downsample data
for i in range(recording_cnt) :
    channel_data_fl = []
    channel_data_ds = []
    for j in range(acc_ch_cnt) :
        # filter signal
        filtered_acc_t = signal.filtfilt(b, a, resampled_acc[i][j])
        
        # Normalize acc data in
        fmax = np.max(filtered_acc_t)
        fmin = np.min(filtered_acc_t)
        filtered_acc_t = (filtered_acc_t - fmin) / (fmax - fmin)

        # decimation
        resampled_acc_t1 = signal.decimate(filtered_acc_t, ds_factor1)  # performed twice due to rounding errors
        resampled_acc_t2 = signal.decimate(resampled_acc_t1, ds_factor2)

        # save channel data
        channel_data_fl.append(resampled_acc[i][j])
        channel_data_ds.append(resampled_acc_t2[1:])
    # save recording data
    filtered_acc.append(channel_data_fl.copy())
    downsampled_acc.append(channel_data_ds.copy())

#### Apply spectogram for CNN data

In [None]:
# parameters
spect_lfp = []
filtered_lfp  = []
window_size = 100
fft_size = 10
overlap = 0.8
offset_iter = 50

# calculate expected window size
_, _, Sxx = signal.spectrogram(lfp_data[i][:window_size, 0],
                               fs_lfp,
                               nperseg=fft_size,
                               noverlap=int(fft_size * overlap),
                               nfft=window_size)
expected_img_size = np.shape(Sxx)

# convert to spectrogram "images"
for i in range(recording_cnt) :
    channel_data = []
    filtered_channel_data = []
    offset = 0
    window_cnt = int(len(lfp_data[i][:, 0]) / (window_size / 2)) - 1
    for k in range(window_cnt):
        tensor_t = np.zeros((expected_img_size[0], expected_img_size[1], lfp_ch_cnt))
        for j in range(lfp_ch_cnt) :
            # combine differential channels
            lfp_diff = lfp_data[i][:, 2*j+1] - lfp_data[i][:, 2*j]
            # filter LFP data
            if (k == 0) :
                # apply bandpass filter
                lfp_signal = signal.filtfilt(b_bp, a_bp, lfp_diff / np.amax(np.absolute(lfp_diff)))
                # append signal
                filtered_channel_data.append(lfp_signal)
            # window data
            data_window = lfp_diff[offset:(offset+window_size)]
            # apply spectrogram
            _, _, Sxx = signal.spectrogram(data_window, fs_lfp,
                                           nperseg=fft_size,
                                           noverlap=int(fft_size * overlap),
                                           nfft=window_size)
            tensor_t[:, :, j] = Sxx
        # append tensor
        channel_data.append(tensor_t)
        # iterate window offset
        offset += offset_iter
    # save recording data
    spect_lfp.append(channel_data.copy())
    filtered_lfp.append(filtered_channel_data.copy())

#### Divide into training and test set

In [None]:
# randomly choose recordings to go into train and test
indices = np.random.permutation(recording_cnt)

# split data
lfp_train = np.array([filtered_lfp[i] for i in indices[:train_recording_cnt]])
lfp_test = np.array([filtered_lfp[i] for i in indices[-test_recording_cnt:]])

spect_train = np.array([spect_lfp[i] for i in indices[:train_recording_cnt]])
spect_test = np.array([spect_lfp[i] for i in indices[-test_recording_cnt:]])

normal_acc_train = np.array([resampled_acc[i][:] for i in indices[:train_recording_cnt]])  # filtered_acc
normal_acc_test = np.array([resampled_acc[i][:] for i in indices[-test_recording_cnt:]])  # filtered_acc

downsampled_acc_train = np.array([downsampled_acc[i][:] for i in indices[:train_recording_cnt]])
downsampled_acc_test = np.array([downsampled_acc[i][:] for i in indices[-test_recording_cnt:]])

#### Write out data

In [None]:
with open('../data/lfp_train.npy', 'wb') as f:
    np.save(f, lfp_train, allow_pickle=True)
    
with open('../data/lfp_test.npy', 'wb') as f:
    np.save(f, lfp_test, allow_pickle=True)
    
with open('../data/spect_train.npy', 'wb') as f:
    np.save(f, spect_train, allow_pickle=True)
    
with open('../data/spect_test.npy', 'wb') as f:
    np.save(f, spect_test, allow_pickle=True)
    
with open('../data/normal_acc_train.npy', 'wb') as f:
    np.save(f, normal_acc_train, allow_pickle=True)
    
with open('../data/normal_acc_test.npy', 'wb') as f:
    np.save(f, normal_acc_test, allow_pickle=True)
    
with open('../data/downsampled_acc_train.npy', 'wb') as f:
    np.save(f, downsampled_acc_train, allow_pickle=True)
    
with open('../data/downsampled_acc_test.npy', 'wb') as f:
    np.save(f, downsampled_acc_test, allow_pickle=True)

print("Files written!")