In [1]:
%store -r doc_behavior 
%store -r doc_ephys

In [2]:
import os

In [3]:
data_directory = './data/hc-3/'

In [4]:
doc_behavior = doc_behavior[[os.path.isdir(os.path.join(data_directory,t,s)) for t,s in doc_behavior[['topdir','session']].values]]
doc_ephys = doc_ephys[doc_ephys['topdir'].isin(doc_behavior['topdir'])]

In [5]:
from xml.dom import minidom as dom
import plotly.express as px
import pandas as pd
import numpy as np
import re

In [6]:
s = 2
    
session = doc_behavior.iloc[s].copy()

session['directory'] = os.path.join(data_directory,session['topdir'],session['session'])

session_files = sorted(os.listdir(session['directory']))
session_files = [f for f in session_files if f.startswith(session['session'])]

session_files_size = [os.path.getsize(os.path.join(session['directory'],f)) for f in session_files]

session_files_extension = [f[len(session['session'])+1:] for f in session_files]
session_files_extension = [re.findall('[a-z0-9]+',f)[0] for f in session_files_extension]

session_files_shank     = [re.findall('.([0-9]+)$',f) for f in session_files]
session_files_shank     = [int(s[0]) if len(s) else np.nan for s in session_files_shank]

session['files'] = pd.DataFrame({
    'name' : session_files,
    'path' : [os.path.join(session['directory'],f) for f in session_files],
    'size' : session_files_size,
    'shank' : session_files_shank,
    'extension' : session_files_extension,
}).sort_values(by='shank')

xml_filepath = os.path.join(session['directory'],session['session']+'.xml')

session_xml = dom.parse(xml_filepath)
params = session_xml.getElementsByTagName('parameters')[0].childNodes[1:-1:2]

params = { param.nodeName:param for p,param in enumerate(params) }
xml_info = {}
xml_info.update({gi.nodeName:gi.childNodes[0].nodeValue for gi in params['generalInfo'].childNodes[1::2] if gi.childNodes})
xml_info.update({gi.nodeName:int(gi.childNodes[0].nodeValue) for gi in params['acquisitionSystem'].childNodes[1::2] if gi.childNodes})
xml_info.update({gi.nodeName:int(gi.childNodes[0].nodeValue) for gi in params['fieldPotentials'].childNodes[1::2] if gi.childNodes})

xml_info['channels'] = {}
for s,shank in enumerate(params['anatomicalDescription'].childNodes[1].childNodes[1:-1:2]):
    for c,channel in enumerate(shank.childNodes[1:-1:2]):
        contact = int(channel.childNodes[0].nodeValue)
        skip = bool(int(channel.getAttribute('skip')))
        xml_info['channels'].update({contact : {
            'shank':s+1,
            'contact' : c,
            'skip' : skip,
            'area' : session['elepos'].get(s+1)
        }})

xml_info['spike'] = {}
for g,group in enumerate(params['spikeDetection'].childNodes[1].childNodes[1::2]):
    entries = group.childNodes[1::2]
    xml_info['spike'][g+1] = {entry.nodeName : int(entry.childNodes[0].nodeValue) if e != 0 else [int(node.childNodes[0].nodeValue) for node in entry.childNodes[1::2]] for e,entry in enumerate(entries)}
    xml_info['spike'][g+1]['shank'] = g+1
    xml_info['spike'][g+1]['nChannels'] = len(xml_info['spike'][g+1]['channels'])


for k,v in xml_info.items():
    session[k] = v

session['nshanks'] = int(session['nshanks'])


In [7]:
session['spikes'] = {}

session['spike']['srate'] = session['samplingRate']
for shank in range(1,session['nshanks']+1):
    shank_files = session['files'].query('shank == %d' % shank)    
    
    spk_file = shank_files.query('extension == "spk"').iloc[0]
    
    nsamples  = session['spike'][shank]['nSamples']
    nchannels = session['spike'][shank]['nChannels']
    nspikes   = spk_file['size'] // 2 // nsamples // nchannels

    session['spike'][shank]['file'] = spk_file
    session['spike'][shank]['nSpikes'] = nspikes
    session['spike'][shank]['waveform'] = np.memmap(spk_file['path'],dtype='int16',mode='r',shape=(nspikes,nsamples,nchannels))
    
    fet_file = shank_files.query('extension == "fet"').iloc[0]

    with open(fet_file['path'],'r') as f:
        fet_content1 = f.readlines()
        fet_content = np.array([[f for f in f.strip().split(' ')] for f in fet_content1[1:]])
    
    session['spike'][shank]['features'] = fet_content[:,:-2].astype(int)

    clu_file = shank_files.query('extension == "clu"').iloc[0]

    with open(clu_file['path'],'r') as f:
        clu_content = np.array(f.readlines(),dtype=int)
        nClusters = clu_content[0]
        clu_content = clu_content[1:]
    
    session['spike'][shank]['nClusters'] = nClusters
    session['spike'][shank]['cluster'] = clu_content
    session['spike'][shank]['clusters'] = np.unique(clu_content)
    
    id_topdir    = doc_ephys['topdir'] == session['topdir']
    id_cluster   = doc_ephys['clu'].isin(session['spike'][shank]['clusters'])
    id_electrode = doc_ephys['ele'] == shank

    session['spike'][shank]['meta'] = doc_ephys[id_topdir & id_cluster & id_electrode]

In [8]:
eeg_file = session['files'].query('extension == "eeg"').iloc[0]

session['eeg'] = {}
session['eeg']['file']     = eeg_file
session['eeg']['nSamples'] = session['eeg']['file']['size'] // 2 // session['nChannels']
session['eeg']['data']     = np.memmap(session['eeg']['file']['path'],dtype='int16',mode='r',shape=(session['eeg']['nSamples'],session['nChannels']))
session['eeg']['srate'] = session['lfpSamplingRate']
session['eeg']['time'] = np.arange(0,session['eeg']['nSamples'])/session['eeg']['srate']

In [9]:
whl_file = session['files'].query('extension == "whl"').iloc[0]

session['tracking'] = {}
session['tracking']['file'] = whl_file
session['tracking']['srate'] = 39.0625 
session['tracking']['data'] = pd.read_csv(whl_file['path'],sep='\t',header=None).replace(-1,np.nan)


In [10]:
%store session

Stored 'session' (Series)
