In [1]:
import os
import warnings
import numpy as np
import luigi
import emu.neuralynx_io as nlx
from emu.pdil.raw import Electrophysiology,Participant,get_data_manifest,points_to_choice
from emu.neuralynx_io import nev_as_records
from emu.nwb import nlx_to_nwb
from emu.pipeline.remote import RemoteCSV
from pynwb import TimeSeries, NWBFile,NWBHDF5IO
from pynwb.ecephys import ElectricalSeries
from pynwb.misc import AnnotationSeries
import pandas as pd
import datetime
import glob

from tqdm import tqdm_notebook as tqdm

# End-to-end example for converting raw data to nwb

In [2]:
all_files = get_data_manifest()

In [3]:
seeg_root = os.path.expanduser('/home/elijahc/.emu/pdil/pt_01/SEEG/raw')

nev_path = os.path.expanduser('/home/elijahc/.emu/pdil/pt_01/SEEG/raw/PO_Day_02.Events.nev')
ncs_paths = sorted(glob.glob(os.path.join(seeg_root,'PO_Day_02.*.ncs')))

# ncs = nlx.load_ncs()
print('nev path: ',nev_path)
print('ncs_paths:')
for p in ncs_paths[:10]:
    print(p)

nev path:  /home/elijahc/.emu/pdil/pt_01/SEEG/raw/PO_Day_02.Events.nev
ncs_paths:
/home/elijahc/.emu/pdil/pt_01/SEEG/raw/PO_Day_02.CSC100_0005.ncs
/home/elijahc/.emu/pdil/pt_01/SEEG/raw/PO_Day_02.CSC100_0006.ncs
/home/elijahc/.emu/pdil/pt_01/SEEG/raw/PO_Day_02.CSC100_0007.ncs
/home/elijahc/.emu/pdil/pt_01/SEEG/raw/PO_Day_02.CSC101_0005.ncs
/home/elijahc/.emu/pdil/pt_01/SEEG/raw/PO_Day_02.CSC101_0006.ncs
/home/elijahc/.emu/pdil/pt_01/SEEG/raw/PO_Day_02.CSC101_0007.ncs
/home/elijahc/.emu/pdil/pt_01/SEEG/raw/PO_Day_02.CSC102_0005.ncs
/home/elijahc/.emu/pdil/pt_01/SEEG/raw/PO_Day_02.CSC102_0006.ncs
/home/elijahc/.emu/pdil/pt_01/SEEG/raw/PO_Day_02.CSC102_0007.ncs
/home/elijahc/.emu/pdil/pt_01/SEEG/raw/PO_Day_02.CSC103_0005.ncs


In [4]:
p = Participant(patient_id=1,raw_files=all_files,seeg_raw_path=seeg_root)



The [Participant](https://github.com/elijahc/emu/blob/3b240fbe8bfd4363ceadcf152dc6811c63493a3a/emu/pdil/raw.py#L221) class provides helper functions for managing all the raw data collected from a single patient

`Participant.cache_behavior()` returns a list of BehaviorRaw luigi tasks to fetch every raw behavior data.

- `BehaviorRaw.output().path` stores the path to where its file *should* exist locally.
- `BehaviorRaw.output().exists()` with retrn True if the file specified by `path` exists.

```python
def cache_behavior(self,verbose=False):
    for i,row in self.behavior_files.iterrows():
        t = BehaviorRaw(
            patient_id=row.patient_id,
            file_id=row.id,
            file_name=row.filename,
            save_to=self.behavior_raw_path,
        )
        yield t
```

- `Participant.load_game_data()` and `Participant.load_pdil_events()` will load mat files containing outcomes from the [pdil game](https://github.com/elijahc/emu/tree/master/PDil) implemented in psych toolbox as well as tic-toc timing of each screen and keypress which we'll sync to ephys data.

- `Participant.load_pdil_events()` and `Participant.load_game_data()` are generators that both use `cache_behavior()` to ensure all behavior files have been downloaded before trying to extract data from them.

```python
def load_pdil_events(self):
    tasks = list(self.cache_behavior())
    missing_tasks = [t for t in tasks if not t.output().exists()]
    print('{} missing tasks'.format(len(missing_tasks)))

    if len(missing_tasks) > 0:
        luigi.build(missing_tasks,local_scheduler=local_scheduler)

   ...
```
 
 - Both behavior load functions are python generators which will `yield` a pandas DataFrame for each block
 
 - If you wrap the function call in pd.concat(), to get a combined DataFrame across all blocks

In [5]:
pd.concat(p.load_pdil_events()).head()

0 missing tasks


Unnamed: 0,event,event_delta,screen,trial,block,ttl_delta
0,trial_start,0.0,,0,3,0.0
1,render_screen1,1.569078,1.0,1,3,1.569078
2,keypress1,2.035263,1.0,1,3,3.604341
3,render_screen2,1.613659,2.0,1,3,5.218
4,keypress2,1.890656,2.0,1,3,7.108656


- `Participant.cache_nev()` and `Participant.cache_ncs()` are the sEEG analogues of `cache_behavior()` and basically do the same thing for the neuralynx channel files (.ncs) which store lfp traces and event files (.nev) which store timestamped ttls sent by the pdil task.

In [6]:
# Create a list of download tasks for the POD2 ncs files
# These files have a _0007 
d2_ncs_tasks = [t for t in p.cache_ncs() if 'PO_Day_02' in t.file_name and '0007' in t.file_name]
d2_ncs_paths = [t.output().path for t in d2_ncs_tasks]

print([t.file_name for t in d2_ncs_tasks[:10]],'\n')


# Create a list of download tasks for the POD4 nev files
d2_nev = [t for t in p.cache_nev() if 'PO_Day_02' in t.file_name and '0007' in t.file_name][0]
print(d2_nev)

['PO_Day_02.CSC100_0007.ncs', 'PO_Day_02.CSC101_0007.ncs', 'PO_Day_02.CSC102_0007.ncs', 'PO_Day_02.CSC103_0007.ncs', 'PO_Day_02.CSC104_0007.ncs', 'PO_Day_02.CSC105_0007.ncs', 'PO_Day_02.CSC106_0007.ncs', 'PO_Day_02.CSC107_0007.ncs', 'PO_Day_02.CSC108_0007.ncs', 'PO_Day_02.CSC109_0007.ncs'] 

NLXRaw(file=/.emu/pdil/pt_01/SEEG/raw/PO_Day_02.Events_0007.nev)


In [7]:
nwb = p.create_nwb(d2_nev.output().path,d2_ncs_paths,blocks=[0,1,2],desc='Patient 1 | Post-op Day 2')

compressing channels: 100%|██████████| 176/176 [02:10<00:00,  1.35it/s]


0 missing tasks
0 missing tasks


In [8]:
nwb

root pynwb.file.NWBFile at 0x140607860210880
Fields:
  acquisition: {
    C100 <class 'pynwb.base.TimeSeries'>,
    ttl <class 'pynwb.misc.AnnotationSeries'>,
    wire_10_electrode_1 <class 'pynwb.ecephys.ElectricalSeries'>,
    wire_10_electrode_10 <class 'pynwb.ecephys.ElectricalSeries'>,
    wire_10_electrode_2 <class 'pynwb.ecephys.ElectricalSeries'>,
    wire_10_electrode_3 <class 'pynwb.ecephys.ElectricalSeries'>,
    wire_10_electrode_4 <class 'pynwb.ecephys.ElectricalSeries'>,
    wire_10_electrode_5 <class 'pynwb.ecephys.ElectricalSeries'>,
    wire_10_electrode_6 <class 'pynwb.ecephys.ElectricalSeries'>,
    wire_10_electrode_7 <class 'pynwb.ecephys.ElectricalSeries'>,
    wire_10_electrode_8 <class 'pynwb.ecephys.ElectricalSeries'>,
    wire_10_electrode_9 <class 'pynwb.ecephys.ElectricalSeries'>,
    wire_11_electrode_1 <class 'pynwb.ecephys.ElectricalSeries'>,
    wire_11_electrode_10 <class 'pynwb.ecephys.ElectricalSeries'>,
    wire_11_electrode_11 <class 'pynwb.ecephys.

In [9]:
nwb.trials.to_dataframe().head()

Unnamed: 0_level_0,start_time,stop_time,outcome
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,218.069584,256.902164,C-C
1,256.918818,288.060886,D-C
2,288.068212,311.112146,C-D
3,311.117797,330.989862,C-C
4,331.000757,356.371729,D-C


In [10]:
with NWBHDF5IO(os.path.join(seeg_root,'..','processed','PO_Day_02.nwb'),'w') as io:
    io.write(nwb)



In [11]:
from emu.luigi.box import BoxClient

In [12]:
nwb.acquisition['ttl']

ttl pynwb.misc.AnnotationSeries at 0x140607850182192
Fields:
  comments: no comments
  conversion: 1.0
  data: ['block_start' 'block_start' 'trial_start' 'trial_start' 'trial_start'
 'trial_start' 'trial_start' 'trial_start' 'trial_start' 'trial_start'
 'block_start' 'block_start' 'trial_start' 'trial_start' 'trial_start'
 'trial_start' 'trial_start' 'trial_start' 'trial_start' 'trial_start'
 'trial_start' 'trial_start' 'trial_start' 'trial_start' 'trial_start'
 'trial_start' 'trial_start' 'block_start' 'block_start' 'trial_start'
 'trial_start' 'trial_start' 'trial_start' 'trial_start' 'trial_start'
 'trial_start' 'trial_start' 'trial_start' 'trial_start' 'trial_start'
 'trial_start' 'trial_start' 'trial_start' 'trial_start']
  description: no description
  interval: 1
  resolution: -1.0
  timestamps: [ 166.02063608  166.23729205  218.06958389  256.918818    288.06821203
  311.1177969   331.00075698  356.38362193  375.11661506  395.88288689
  491.21444297  491.43109894  510.847404    

In [13]:
import scipy.io as sio

In [14]:
channels = [ch for ch in nwb.acquisition.keys() if ch.startswith('channel') or ch.startswith('wire')]

In [15]:
[(nwb.acquisition[c].data.shape,c) for c in channels]

[((1889941,), 'wire_9_electrode_3'),
 ((1889941,), 'wire_9_electrode_4'),
 ((1889941,), 'wire_9_electrode_5'),
 ((1889941,), 'wire_9_electrode_6'),
 ((1889941,), 'wire_9_electrode_7'),
 ((1889941,), 'wire_9_electrode_8'),
 ((1889941,), 'wire_9_electrode_9'),
 ((1889941,), 'wire_9_electrode_10'),
 ((1889941,), 'wire_9_electrode_11'),
 ((1889941,), 'wire_1_electrode_10'),
 ((1889941,), 'wire_9_electrode_12'),
 ((1889941,), 'wire_9_electrode_13'),
 ((1889941,), 'wire_9_electrode_14'),
 ((1889941,), 'wire_9_electrode_15'),
 ((1889941,), 'wire_9_electrode_16'),
 ((1889941,), 'wire_10_electrode_1'),
 ((1889941,), 'wire_10_electrode_2'),
 ((1889941,), 'wire_10_electrode_3'),
 ((1889941,), 'wire_10_electrode_4'),
 ((142592,), 'wire_10_electrode_5'),
 ((1889941,), 'wire_10_electrode_6'),
 ((1889941,), 'wire_10_electrode_7'),
 ((1889941,), 'wire_10_electrode_8'),
 ((1889941,), 'wire_10_electrode_9'),
 ((1889941,), 'wire_10_electrode_10'),
 ((1889941,), 'wire_11_electrode_1'),
 ((1889941,), 'wire

In [16]:

def nwb_to_mat(out_mat,compress=True):
#     channels = [ch for ch in nwb.acquisition.keys() if ch.startswith('channel')]
    mdict = {k:nwb.acquisition[k].data for k in nwb.acquisition.keys()}
    sio.savemat(out_mat,mdict,do_compression=compress)


In [17]:
md = nwb_to_mat('/home/elijahc/.emu/pdil/pt_01/SEEG/processed/PO_Day_02.mat')


In [19]:
from emu.luigi.box import BoxClient,file_id_to_path
box = BoxClient()

In [20]:
file_id_to_path(633031167652)

'/EMU/STUDY_PDil/PT_01/SEEG/electrode_locations.csv'

In [22]:
mat_path = '/home/elijahc/.emu/pdil/pt_01/SEEG/processed/PO_Day_02.mat'
nwb_path = '/home/elijahc/.emu/pdil/pt_01/SEEG/processed/PO_Day_02.nwb'
box.upload('/EMU/STUDY_PDil/PT_01/SEEG/processed',mat_path)

File exists, updating contents


<Box File - 633760195823 (PO_Day_02.mat)>

In [24]:
from emu.pipeline.remote import RemoteCSV
RemoteCSV(file_path='/EMU/STUDY_PDil/PT_01/SEEG/electrode_locations.csv').load().head()

Unnamed: 0,chan_num,ele_num,anat_sh,anat_lg
0,1,1,LOF,L.Orbital.Frontal
1,2,2,LOF,L.Orbital.Frontal
2,3,3,LOF,L.Orbital.Frontal
3,4,4,LOF,L.Orbital.Frontal
4,5,5,LOF,L.Orbital.Frontal
