In [1]:
import os
from os import path
from glob import glob

import numpy as np
import pandas as pd
from scipy import signal
import json
import matplotlib.pyplot as plt
from matplotlib import cm

%matplotlib widget

In [2]:
data_dir = '../data/eeg_pilot/mouse497154/pilot2_2020.01.15/recording1/'
eeg_cont_dir = path.join(data_dir, 'continuous/Rhythm_FPGA-111.0/*')
eeg_evnt_dir = path.join(data_dir, 'events/Rhythm_FPGA-111.0/TTL_1/*')
eeg_mesg_dir = path.join(data_dir, 'events/Message_Center-904.0/TEXT_group_1/*')

# Data streams from EEG recording

## Metadata
Extracted from ```structure.oebin``` file.

In [3]:
with open(path.join(data_dir, 'structure.oebin'), 'r') as f:
    metadata = json.load(f)
# SAMPLE_RATE = metadata['continuous'][0]['sample_rate']
BIT_VOLTS = {x['recorded_processor_index'] : x['bit_volts'] for x in metadata['continuous'][0]['channels']}
with open(path.join(data_dir, 'sync_messages.txt'), 'r') as f:
    lines = f.readlines()
start_time = lines[1].split(':')[-1][:-1]
START_TIME = int(start_time.split('@')[0])
SAMPLE_RATE = int(start_time.split('@')[1][:-2])
print('Start time: %d\nSample rate: %d Hz\nAll metadata:'%(START_TIME, SAMPLE_RATE))
metadata

Start time: 9400320
Sample rate: 10000 Hz
All metadata:


{'GUI version': '0.4.5',
 'continuous': [{'folder_name': 'Rhythm_FPGA-111.0/',
   'sample_rate': 10000,
   'source_processor_name': 'Rhythm FPGA',
   'source_processor_id': 111,
   'source_processor_sub_idx': 0,
   'recorded_processor': 'Rhythm FPGA',
   'recorded_processor_id': 111,
   'num_channels': 32,
   'channels': [{'channel_name': 'CH1',
     'description': 'Headstage data channel',
     'identifier': 'genericdata.continuous',
     'history': 'Rhythm FPGA',
     'bit_volts': 0.19499999284744263,
     'units': 'uV',
     'source_processor_index': 0,
     'recorded_processor_index': 0},
    {'channel_name': 'CH2',
     'description': 'Headstage data channel',
     'identifier': 'genericdata.continuous',
     'history': 'Rhythm FPGA',
     'bit_volts': 0.19499999284744263,
     'units': 'uV',
     'source_processor_index': 1,
     'recorded_processor_index': 1},
    {'channel_name': 'CH3',
     'description': 'Headstage data channel',
     'identifier': 'genericdata.continuous',
 

In [4]:
print('Continuous datasets:')
print([path.basename(x) for x in glob(eeg_cont_dir)])
print('Events datasets:')
print([path.basename(x) for x in glob(eeg_evnt_dir)])
print('Message datasets:')
print([path.basename(x) for x in glob(eeg_mesg_dir)])

Continuous datasets:
['continuous.dat', 'timestamps.npy']
Events datasets:
['timestamps.npy', 'channel_states.npy', 'full_words.npy', 'channels.npy']
Message datasets:
['timestamps.npy', 'text.npy', 'channels.npy']


## Message dataset

In [5]:
dataset = eeg_mesg_dir
messages = pd.DataFrame({
    x.replace('.npy', '') : np.load(path.join(dataset[:-1], x))\
    for x in [path.basename(x) for x in glob(dataset)]
})
messages.timestamps = (messages.timestamps - START_TIME) / SAMPLE_RATE
messages.set_index('timestamps', inplace=True)
messages.text = messages.text.map(lambda x: x.decode('ASCII'))
messages

Unnamed: 0_level_0,text,channels
timestamps,Unnamed: 1_level_1,Unnamed: 2_level_1
727.5008,isoo on T 5%,1
931.2768,iso on 2%,1
1689.344,iso oFF,1


## Events dataset

In [6]:
dataset = eeg_evnt_dir
events = pd.DataFrame({
    x.replace('.npy', '') : np.load(path.join(dataset[:-1], x))\
    for x in [path.basename(x) for x in glob(dataset)]
})
events.timestamps = (events.timestamps - START_TIME) / SAMPLE_RATE
events.set_index('timestamps', inplace=True)
events

Unnamed: 0_level_0,channel_states,full_words,channels
timestamps,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
9.8399,8,128,8
9.8599,-8,0,8
9.8799,8,128,8
9.9089,-8,0,8
9.9958,8,128,8
...,...,...,...
3444.1338,-8,0,8
3444.1918,8,128,8
3444.2787,-8,0,8
3444.3657,8,128,8


### Timestamps for synchronization
To be used for synchronization. See https://github.com/open-ephys/sync-barcodes/blob/master/barcodes.py.

In [7]:
f, ax = plt.subplots(1, 1, figsize=(14, 2), tight_layout=True)
events.plot(ax=ax)
ax.legend(loc=(1.01, 0), fontsize=10);

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Continuous dataset

In [8]:
dataset = eeg_cont_dir
timestamps = (np.load(path.join(dataset[:-1], 'timestamps.npy')) - START_TIME) / SAMPLE_RATE
data = np.fromfile(path.join(dataset[:-1], 'continuous.dat'), dtype='<i2')*0.195
n_t = int(timestamps.shape[0])
n_c = int(data.shape[0]/timestamps.shape[0])
data = np.reshape(data, (n_t, n_c))
data = pd.DataFrame(
    data=data,
    columns=range(1, 33),
    index=timestamps
#     index=pd.TimedeltaIndex(timestamps, unit='s')
)
data

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,23,24,25,26,27,28,29,30,31,32
0.0000,-63.570,-43.875,-61.035,-35.295,-39.390,-25.545,-45.435,-29.835,-631.995,-1.755,...,273.000,1261.260,-46.410,-31.980,-45.825,-46.995,-70.590,-69.420,-66.690,-77.610
0.0001,-61.815,-44.460,-54.015,-23.400,-38.610,-23.595,-43.485,-27.885,-522.600,1.950,...,405.600,1405.950,-38.220,-38.025,-47.580,-41.925,-68.445,-66.300,-67.860,-72.540
0.0002,-57.135,-31.200,-50.310,-19.890,-35.685,-26.715,-46.215,-25.350,-402.870,-1.755,...,510.120,1528.605,-39.975,-30.225,-46.020,-35.100,-67.080,-63.180,-67.470,-76.635
0.0003,-47.190,-31.980,-47.970,-15.405,-28.470,-14.235,-42.510,-27.300,-300.105,-0.585,...,609.960,1637.805,-34.320,-27.885,-47.190,-30.225,-61.035,-56.940,-55.185,-66.300
0.0004,-47.385,-34.320,-37.830,-7.605,-26.910,-11.895,-36.270,-22.620,-185.250,7.020,...,724.230,1776.840,-27.885,-22.815,-29.250,-26.325,-48.165,-47.580,-39.390,-49.725
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3471.7435,126.750,110.175,64.935,61.425,59.475,81.705,66.885,64.350,-2615.730,55.575,...,-2392.455,-3503.760,54.600,42.510,55.380,49.335,74.490,76.635,116.805,116.025
3471.7436,109.200,93.210,54.600,46.020,48.165,62.790,57.525,60.255,-2833.155,63.180,...,-2615.340,-3732.300,46.800,34.515,50.310,39.975,66.495,68.835,101.985,112.515
3471.7437,96.525,78.780,44.070,24.180,38.805,47.190,48.945,47.775,-3044.535,63.765,...,-3037.905,-4218.435,30.615,25.740,37.830,32.955,49.335,56.355,64.545,87.750
3471.7438,87.165,65.715,46.410,31.005,41.925,45.045,51.285,45.630,-3761.940,54.990,...,-4068.285,-5346.315,38.610,36.075,38.805,34.320,49.335,68.835,63.375,100.035


# Processing EEG signals
[This](http://ims.mf.uni-lj.si/archive/15(1)/21.pdf) might be of interest, and maybe also some papers mentioned in [this](https://www.researchgate.net/post/High_frequency_component_of_EEG_signal_noise_or_information) thread.  

## Visualize raw signals
* The y-axis is on $\mu$V scale
* Ch 1 and 2 are two regular electrodes with proper connections.
* Ch 9 is disconnected. **What is the (5mV) signal there?**
* **What are the big spikes (~2mV) in ch 1 and 2?**

In [9]:
indices = [0, 1, 8]
f, axes = plt.subplots(len(indices), 1, squeeze=False, sharex=True, figsize=(14, 2*len(indices)), tight_layout=True)
for i, ax in enumerate(axes.T[0]):
    data[indices[i]+1].plot(ax=ax, label='CH%d'%(indices[i]+1))
    ax.set_xlim(226, 227)
    if indices[i]+1 != 9:
        ax.set_ylim(-500, 500)
    ax.legend(loc=1)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Power spectra of raw signals

In [10]:
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True, figsize=(8, 3.5), tight_layout=True)
[ax1.psd(data[1], Fs=SAMPLE_RATE, NFFT=8192, detrend=i, label=str(i)) for i in [None, 'mean', 'linear']]
ax1.set_title('Compare detrending')
[ax2.psd(data[i], Fs=SAMPLE_RATE, NFFT=8192, detrend='linear', label='ch%d'%i) for i in [1, 2, 9]]
ax2.set_title('Compare channels')
for ax in [ax1, ax2]:
    ax.set_xlim(0, 125)
    ax.set_ylim(0, 70)
    ax.legend(loc=1)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Low pass filter and remove 60 Hz
For now, I have tried setting the low pass cutoff between 125 and 250 Hz. 125 might be too close to the 30-100 Hz gamma band, but it is the only integer factor of 10,000 below 200, which is why I use it below.

In [11]:
lp_freq = SAMPLE_RATE/80 # assumes sample_rate is 10000, so that cutoff is 125 Hz
b, a = signal.butter(4, lp_freq, 'low', fs=SAMPLE_RATE)
lp_signals = {
    i : signal.filtfilt(b, a, data[i]) for i in [1, 2, 9]
}

In [16]:
notch_freq = [60, 120, 180, 240]
width = 4 # filter width in Hz
f_signals = {
    i : lp_signals[i] for i in lp_signals.keys()
}
for freq in notch_freq:
    b, a = signal.iirnotch(freq, Q=60/width, fs=SAMPLE_RATE)
    for i, s in f_signals.items():
        f_signals[i] = signal.filtfilt(b, a, s)

In [17]:
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True, figsize=(8, 3.5), tight_layout=True)
[ax1.psd(lp_signals[i], Fs=SAMPLE_RATE, NFFT=8192, detrend='linear', label='ch%d'%i) for i in [1, 2, 9]]
[ax1.psd(data[i], Fs=SAMPLE_RATE, NFFT=8192, detrend='linear', lw=0.2) for i in [1, 2, 9]]
ax1.set_title('After low-pass @%d'%lp_freq)
[ax2.psd(f_signals[i], Fs=SAMPLE_RATE, NFFT=8192, detrend='linear', label='ch%d'%i) for i in [1, 2, 9]]
[ax2.psd(data[i], Fs=SAMPLE_RATE, NFFT=8192, detrend='linear', lw=0.2) for i in [1, 2, 9]]
ax2.set_title('And also removing line noise')
for ax in [ax1, ax2]:
    ax.set_xlim(0, 400)
    ax.set_ylim(0, 70)
    ax.legend(loc=1)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Example signals after low-pass filtering

In [14]:
indices = [0, 1, 8]
f, axes = plt.subplots(len(indices), 1, squeeze=False, sharex=True, figsize=(14, 2*len(indices)), tight_layout=True)
for i, ax in enumerate(axes.T[0]):
    data[indices[i]+1].plot(ax=ax, ls='--', lw=0.5)
    ax.plot(data.index, lp_signals[indices[i]+1], label='CH%d'%(indices[i]+1))
    ax.legend(loc=1)
    ax.set_xlim(226, 226.5)
    if indices[i]+1 != 9:
        ax.set_ylim(-600, 600)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Signal post low-pass and line noise removal

In [19]:
indices = [0, 1, 8]
f, axes = plt.subplots(len(indices), 1, squeeze=False, sharex=True, figsize=(14, 2*len(indices)), tight_layout=True)
for i, ax in enumerate(axes.T[0]):
    data[indices[i]+1].plot(ax=ax, ls='--', lw=0.5)
    ax.plot(data.index, f_signals[indices[i]+1], label='CH%d'%(indices[i]+1))
    ax.legend(loc=1)
#     ax.set_xlim(226, 226.5)
#     if indices[i]+1 != 9:
#         ax.set_ylim(-600, 600)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Disconnected channels

In [12]:
f, ax = plt.subplots(1, 1, figsize=(14, 2), tight_layout=True)
ch9.plot(ax=ax)
ax.set_xlim(18, 19)
ax.legend(loc=1);

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …