# Data Cleaning

## Import

In [38]:
import os
import pathlib
import matplotlib

import numpy as np
import matplotlib.pyplot as plt
import mne
import mne_bids
from mne.preprocessing import (ICA, create_eog_epochs, 
                               compute_proj_eog, create_ecg_epochs, corrmap)
                               
matplotlib.use('Qt5Agg')
mne.set_log_level('warning') #All mne functions will only spit out output if they have reason to.

## Insert Subject Information

In [39]:
subject = '01'
session = '03'
task = 'WM1isi1250'
intername = 'sub-'+subject+'_ses-'+session+'_task-'+task+'_interpolated_data.fif'
oldepochs = 'sub-'+subject+'_ses-'+session+'_task-'+task+'_epochs-epo.fif'
filename = 'sub-'+subject+'_ses-'+session+'_task-'+task+'_cleaned_data.fif'
filename

'sub-01_ses-03_task-WM1isi1250_cleaned_data.fif'

## Input Raw Data

In [40]:
raw = mne.io.read_raw(pathlib.Path('out_data') / '02_interpolation_data' / 
                      intername)
events, event_id = mne.events_from_annotations(raw) 
raw.load_data()

  raw = mne.io.read_raw(pathlib.Path('out_data') / '02_interpolation_data' /


0,1
Measurement date,"November 10, 2019 20:36:50 GMT"
Experimenter,mne_anonymize
Digitized points,0 points
Good channels,"127 EEG, 2 EOG"
Bad channels,
EOG channels,"E8, E25"
ECG channels,Not available
Sampling frequency,1000.00 Hz
Highpass,0.00 Hz
Lowpass,500.00 Hz


## ICA Analysis

### Apply a High Pass Filter

In [41]:
high_pass_raw = raw.filter(l_freq=1, h_freq=None) 

### Run ICA

In [42]:
ica = ICA(n_components=15, max_iter='auto', random_state=97)
ica.fit(high_pass_raw)
ica

0,1
Method,fastica
Fit,67 iterations on raw data (561849 samples)
ICA components,15
Explained variance,99.9 %
Available PCA components,127
Channel types,eeg
ICA components marked for exclusion,—


### Evaluate ICA Components

In [43]:
ica.plot_components()

[<MNEFigure size 975x731 with 15 Axes>]

### Reject Eye Blinks Component

In [44]:
ica.exclude = [6] # Input new component number here!

### Create New Raw Data

In [45]:
reconst_raw = raw.copy()
ica.apply(reconst_raw)

0,1
Measurement date,"November 10, 2019 20:36:50 GMT"
Experimenter,mne_anonymize
Digitized points,0 points
Good channels,"127 EEG, 2 EOG"
Bad channels,
EOG channels,"E8, E25"
ECG channels,Not available
Sampling frequency,1000.00 Hz
Highpass,1.00 Hz
Lowpass,500.00 Hz


### Evaluate ICA Component Removal

## Signal Space Projection

### Filter Raw Data

In [46]:
filter_raw = reconst_raw.filter(l_freq=0.1, h_freq=40) 
filter_raw

0,1
Measurement date,"November 10, 2019 20:36:50 GMT"
Experimenter,mne_anonymize
Digitized points,0 points
Good channels,"127 EEG, 2 EOG"
Bad channels,
EOG channels,"E8, E25"
ECG channels,Not available
Sampling frequency,1000.00 Hz
Highpass,1.00 Hz
Lowpass,40.00 Hz


### Compute EOG Projector 

In [47]:
eog_projs, _ = compute_proj_eog(filter_raw, n_grad=0, n_mag=0, n_eeg=3, reject=None,
                                no_proj=True)
eog_projs

[<Projection | EOG-eeg--0.200-0.200-PCA-01, active : False, n_channels : 127, exp. var : 99.76%>,
 <Projection | EOG-eeg--0.200-0.200-PCA-02, active : False, n_channels : 127, exp. var : 0.24%>,
 <Projection | EOG-eeg--0.200-0.200-PCA-03, active : False, n_channels : 127, exp. var : 0.00%>]

### Visualize Scalp Distribution

In [48]:
mne.viz.plot_projs_topomap(eog_projs, info=raw.info)

<MNEFigure size 585x260 with 3 Axes>

### Apply Signal Space Projectors

In [49]:
reconst_raw.add_proj(eog_projs)
reconst_raw.apply_proj()

0,1
Measurement date,"November 10, 2019 20:36:50 GMT"
Experimenter,mne_anonymize
Digitized points,0 points
Good channels,"127 EEG, 2 EOG"
Bad channels,
EOG channels,"E8, E25"
ECG channels,Not available
Sampling frequency,1000.00 Hz
Highpass,1.00 Hz
Lowpass,40.00 Hz


## Epoch Comparison

### Input Old Epochs

In [50]:
old_epochs = mne.read_epochs(pathlib.Path('out_data')
            / '03_epochs' / oldepochs)
old_epochs

0,1
Number of events,527
Events,TsD-: 131 WMD+: 132 bgin: 132 fix+: 132
Time range,-0.250 – 0.800 sec
Baseline,-0.250 – 0.000 sec


#### Rejection Filter

In [51]:
reject_criteria = dict(eeg=100e-6) # 100 µV, what should our rejection criteria be? 

old_epochs.drop_bad(reject=reject_criteria)

0,1
Number of events,329
Events,TsD-: 82 WMD+: 92 bgin: 81 fix+: 74
Time range,-0.250 – 0.800 sec
Baseline,-0.250 – 0.000 sec


### Run New Epochs

In [52]:
tmin = -0.25 #300 ms before the event
tmax = 0.8 #500 ms after the onset

new_epochs = mne.Epochs(reconst_raw,
                    events=events,
                    event_id=event_id,
                    tmin=tmin,
                    tmax=tmax,
                    preload=True)
new_epochs

0,1
Number of events,527
Events,TsD-: 131 WMD+: 132 bgin: 132 fix+: 132
Time range,-0.250 – 0.800 sec
Baseline,-0.250 – 0.000 sec


In [53]:
reject_criteria = dict(eeg=100e-6) # 100 µV, what should our rejection criteria be? 

new_epochs.drop_bad(reject=reject_criteria)

0,1
Number of events,498
Events,TsD-: 125 WMD+: 125 bgin: 125 fix+: 123
Time range,-0.250 – 0.800 sec
Baseline,-0.250 – 0.000 sec


## Result

In [54]:
old_epochs.plot(title = 'Old Epochs')
new_epochs.plot(title = 'New Epochs')

<mne_qt_browser._pg_figure.MNEQtBrowser at 0x28d1db5a5e0>

## Saving Cleaned Epoch Data 

In [55]:
new_epochs.save(pathlib.Path('out_data')
            / '05_cleaned_data' / filename)

  new_epochs.save(pathlib.Path('out_data')
