In [1]:
import os
import time
import datetime
from pathlib import Path
import numpy as np
import pandas as pd
import pyabf
import utils

%load_ext autoreload
%autoreload 2

In [2]:
human18_meta = pd.read_csv('./data/raw/Metadata/Human18_Metadatav2.csv')
human18_meta.drop('Unnamed: 0', axis=1, inplace=True)

In [3]:
human18_meta.shape

(139, 18)

In [4]:
human18_meta.head()

Unnamed: 0,ABF File,Data Type,Exp. Date,Case #,Cell #,Cell Layer,Stim Type,Threshold,Gain Value,ZD,Hold,Unnamed: 12,Gain,Offset,Response Channel,Command Channel,RMP,Stimulus type
0,18320001,Human,March_20_2018,First,C1,L23,Firing,n.a,n.a,n.a,n.a,,1.0,-20.0,Iclamp(mV),Current_in(pA),-70.0,Ramp
1,18320005,Human,March_20_2018,First,C1,L23,Firing,n.a,n.a,n.a,n.a,,1.0,-20.0,Iclamp(mV),Current_in(pA),-70.5,Long_Square
2,18320015,Human,March_20_2018,First,C2,L23,Firing,n.a,n.a,n.a,n.a,,1.0,-20.0,Iclamp(mV),Current_in(pA),-68.1,Long_Square
3,18320019,Human,March_20_2018,First,C3,L23,Firing,n.a,n.a,n.a,n.a,,1.0,-27.0,Iclamp(mV),Current_in(pA),-71.3,Ramp
4,18320021,Human,March_20_2018,First,C3,L23,Firing,n.a,n.a,n.a,n.a,,1.0,-27.0,Iclamp(mV),Current_in(pA),-71.3,Long_Square


## Identify files to be converted
- ABF files from human2018
- need metadata to determine which files are testing intrinsic properties
- extract ABF version info, time of creation and comments from the ABF file

In [5]:
human2018_abf_dir = Path("./data/raw/recordings/Human tissue/Human tissue-White noise_2018/")
human2018_file_info = utils.summary_df(human2018_abf_dir)

In [6]:
%%time
def parse_timestamp(abf_path):
    abf = pyabf.ABF(abf_path)
    return datetime.datetime.strptime(abf.abfDateTimeString, "%Y-%m-%dT%H:%M:%S.%f")

def extract_abf_version(abf_path):
    abf = pyabf.ABF(abf_path)
    return abf.abfVersionString

def extract_abf_comments(abf_path):
    abf = pyabf.ABF(abf_path)
    return abf.tagComments

human2018_file_info['timestamp'] = human2018_file_info.path.apply(parse_timestamp)
human2018_file_info['abf_version'] = human2018_file_info.path.apply(extract_abf_version)
human2018_file_info['abf_comments'] = human2018_file_info.path.apply(extract_abf_comments)

# add this column to merge with provided metadata
human2018_file_info['file_id'] = human2018_file_info.file_name.apply(lambda x: Path(x).stem) 

CPU times: user 2.43 s, sys: 900 ms, total: 3.33 s
Wall time: 3.57 s


In [7]:
human2018_file_info.shape

(174, 6)

In [8]:
human2018_file_info.file_name.nunique()

162

In [9]:
human2018_file_info.head()

Unnamed: 0,file_name,path,timestamp,abf_version,abf_comments,file_id
0,18417020.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-04-17 15:04:31.906,1.8.3.0,"[Cell 3, RMP -64.7 L2/3;, Cell 3, RMP -64.7 ...",18417020
1,18417023.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-04-17 15:07:01.500,1.8.3.0,"[Cell 3, RMP -64.7 L2/3; offset -17.5 mv]",18417023
2,18417026.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-04-17 15:24:09.921,1.8.3.0,"[Cell 3, RMP -64.7 L2/3; offset -17.5 mv, Gai...",18417026
3,18417030.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-04-17 15:42:51.375,1.8.3.0,"[Cell 3, RMP -64.7 L2/3; offset -17.5 mv, Gai...",18417030
4,18417025.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-04-17 15:12:11.625,1.8.3.0,"[Cell 3, RMP -64.7 L2/3; offset -17.5 mv, Gai...",18417025


In [10]:
human2018_file_info.abf_version.value_counts()

1.8.3.0    174
Name: abf_version, dtype: int64

## identify issues with duplicates

In [11]:
human2018_file_info[human2018_file_info.duplicated(subset=['file_id', 'timestamp'], keep=False)]

Unnamed: 0,file_name,path,timestamp,abf_version,abf_comments,file_id
81,18129016.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-01-29 16:08:33.000,1.8.3.0,"[C4, L5, RMP: -69.4 mv, Gain 20, Dc 50, 1-30]",18129016
82,18129015.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-01-29 16:01:20.750,1.8.3.0,"[C4, L5, RMP: -69.4 mv, Gain 20, Dc 25, 1-30]",18129015
83,18129016.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-01-29 16:08:33.000,1.8.3.0,"[C4, L5, RMP: -69.4 mv, Gain 20, Dc 50, 1-30]",18129016
84,18129015.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-01-29 16:01:20.750,1.8.3.0,"[C4, L5, RMP: -69.4 mv, Gain 20, Dc 25, 1-30]",18129015


In [12]:
human2018_file_info[human2018_file_info.duplicated(subset='file_id', keep=False)]

Unnamed: 0,file_name,path,timestamp,abf_version,abf_comments,file_id
81,18129016.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-01-29 16:08:33.000,1.8.3.0,"[C4, L5, RMP: -69.4 mv, Gain 20, Dc 50, 1-30]",18129016
82,18129015.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-01-29 16:01:20.750,1.8.3.0,"[C4, L5, RMP: -69.4 mv, Gain 20, Dc 25, 1-30]",18129015
83,18129016.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-01-29 16:08:33.000,1.8.3.0,"[C4, L5, RMP: -69.4 mv, Gain 20, Dc 50, 1-30]",18129016
84,18129015.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-01-29 16:01:20.750,1.8.3.0,"[C4, L5, RMP: -69.4 mv, Gain 20, Dc 25, 1-30]",18129015
101,18320002.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-03-20 18:12:52.812,1.8.3.0,[C1; L2/3; RMP --61.4;],18320002
103,18320001.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-03-20 18:12:52.812,1.8.3.0,[C1; L2/3; RMP --61.4;],18320001
104,18320006.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-03-20 18:22:37.390,1.8.3.0,[C1; L2/3; RMP --61.4; offset -20; Gain 20; DC...,18320006
105,18320007.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-03-20 18:26:28.390,1.8.3.0,[C1; L2/3; RMP --61.4; offset -20; Gain 20; DC...,18320007
109,18320016.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-03-20 19:35:05.781,1.8.3.0,[C3; L2/3; RMP -66.2;],18320016
110,18320019.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-03-20 19:36:50.875,1.8.3.0,[C3; L2/3; RMP -66.2;],18320019


## Merge metadata with extracted information about ABF files

In [13]:
merged_meta = human18_meta.merge(human2018_file_info, left_on='ABF File', right_on='file_id' , how='left')

In [14]:
# we are left with 36 files to convert
merged_meta.abf_version.value_counts()

1.8.3.0    158
Name: abf_version, dtype: int64

In [15]:
merged_meta['ABF File'].nunique()

131

## Convert

In [16]:
output_dir = Path("data/processed/recordings/human2018/")
output_dir.mkdir(exist_ok=True, parents=True)

In [17]:
to_convert = merged_meta[merged_meta['Stim Type'] == 'Firing'].drop_duplicates('file_id')

In [18]:
def get_stim_chan(abf_path):
    abf = pyabf.ABF(abf_path)
    return abf.dacNames[1]

def get_resp_chan(abf_path):
    abf = pyabf.ABF(abf_path)
    return abf.adcNames[0]

In [19]:
to_convert['stim_chan'] = to_convert.path.apply(get_stim_chan)
to_convert['resp_chan'] = to_convert.path.apply(get_resp_chan)

In [20]:
to_convert.abf_version.value_counts()

1.8.3.0    41
Name: abf_version, dtype: int64

In [21]:
from x_to_nwb import ABF1Converter_meta2

def generate_subject_meta(row):
    meta = {
    'Subject': {
        'age': None,
        'description': f'{row.abf_comments}',
        'species': 'Homo sapiens',
        'subject_id': f'Donor_{row.file_id}'}
    }
    return meta


def convert_abfv1_row(row):
    abf = pyabf.ABF(row.path)
    file_stem = row.file_id
    output_path = output_dir / f'{file_stem}.nwb'
    meta = generate_subject_meta(row)

    nwb = ABF1Converter_meta2.ABF1Converter(row.path,
                                      str(output_path), 
                                      acquisitionChannelName=row.resp_chan,
                                      stimulusChannelName=row.stim_chan,
                                      responseOffset=row.Offset,
                                      metadata=meta,
                                      responseGain=float(row.Gain) * 1000)
    
    nwb.convert()

    print(f"NWB Conversion complete for {output_path}")

In [22]:
successes = []
errors = []

for i, row in to_convert.iterrows():
    if row.abf_version == '2.6.0.0':
        try:
            convert_abfv2_row(row)
            successes.append((row.file_id, row.path))
        except (ValueError, IndexError, TypeError, KeyError) as e:
            print('**'*50)
            print(f'Error: {e} \n {row.path} \n')
            errors.append((e, row.file_id, row.path))
    
    elif row.abf_version == '1.8.3.0':
        try:
            convert_abfv1_row(row)
            successes.append((row.file_id, row.path))
        except (ValueError, IndexError, TypeError, KeyError, AttributeError) as e:
            print('**'*50)
            print(f'Error: {e} \n {row.path} \n')
            errors.append((e, row.file_id, row.path))        

data/raw/recordings/Human tissue/Human tissue-White noise_2018/March_20_2018/Second case/C1_L23/firing/18320001.abf
****************************************************************************************************
Error: 'NoneType' object is not callable 
 data/raw/recordings/Human tissue/Human tissue-White noise_2018/March_20_2018/Second case/C1_L23/firing/18320001.abf 

data/raw/recordings/Human tissue/Human tissue-White noise_2018/March_20_2018/First case/C1_L23/Firing/18320005.abf


  warn("Date is missing timezone information. Updating to local timezone.")


Successfully converted to data/processed/recordings/human2018/18320005.nwb.
NWB Conversion complete for data/processed/recordings/human2018/18320005.nwb
data/raw/recordings/Human tissue/Human tissue-White noise_2018/March_20_2018/Second case/C2_L23/Gain 20/18320015.abf
****************************************************************************************************
Error: 'ABF' object has no attribute '_stringsIndexed' 
 data/raw/recordings/Human tissue/Human tissue-White noise_2018/March_20_2018/Second case/C2_L23/Gain 20/18320015.abf 

data/raw/recordings/Human tissue/Human tissue-White noise_2018/March_20_2018/Second case/C3_L23/Firing/18320019.abf
Successfully converted to data/processed/recordings/human2018/18320019.nwb.
NWB Conversion complete for data/processed/recordings/human2018/18320019.nwb
data/raw/recordings/Human tissue/Human tissue-White noise_2018/March_20_2018/First case/C3_L23/Firing/18320021.abf
Successfully converted to data/processed/recordings/human2018/1832002

Successfully converted to data/processed/recordings/human2018/18329013.nwb.
NWB Conversion complete for data/processed/recordings/human2018/18329013.nwb
data/raw/recordings/Human tissue/Human tissue-White noise_2018/March_29_2018/C3_L23/Firing/18329014.abf
Successfully converted to data/processed/recordings/human2018/18329014.nwb.
NWB Conversion complete for data/processed/recordings/human2018/18329014.nwb
data/raw/recordings/Human tissue/Human tissue-White noise_2018/March_29_2018/C8_L23/Firing/18329031.abf
Successfully converted to data/processed/recordings/human2018/18329031.nwb.
NWB Conversion complete for data/processed/recordings/human2018/18329031.nwb
data/raw/recordings/Human tissue/Human tissue-White noise_2018/March_29_2018/C9.1_L23/Firing/18329033.abf
Successfully converted to data/processed/recordings/human2018/18329033.nwb.
NWB Conversion complete for data/processed/recordings/human2018/18329033.nwb
data/raw/recordings/Human tissue/Human tissue-White noise_2018/March_29_20

In [23]:
!dandi validate "data/processed/recordings/human2018/newtest/"

Usage: dandi validate [OPTIONS] [PATHS]...
Try 'dandi validate --help' for help.

Error: Invalid value for '[PATHS]...': Path 'data/processed/recordings/human2018/newtest/' does not exist.


## Extract stimulus info to later merge with metadata

In [33]:
converted_nwb_paths = list(Path('data/processed/recordings/human2018/').glob('*.nwb'))

In [34]:
converted_nwb_file_ids = [x.stem for x in converted_nwb_paths]

In [35]:
converted_nwb_file_ids

['18329051',
 '18329044',
 '18329050',
 '18o22020',
 '18129004',
 '18329043',
 '18320016',
 '18220002',
 '18329033',
 '18329031',
 '18320014',
 '18320000',
 '18426010',
 '18426000',
 '18320005',
 '18426017',
 '18220007',
 '18320012',
 '18426016',
 '18329013',
 '18201005',
 '18201011',
 '18220008',
 '18320021',
 '18201004',
 '18426009',
 '18201028',
 '18320019',
 '18220019',
 '18320031',
 '18320030',
 '18329014',
 '18320024',
 '18220018',
 '18201029',
 '18o22001',
 '18129009',
 '18329062',
 '18o22010']

In [36]:
to_extract_stim_info = human2018_file_info[human2018_file_info.file_id.isin(converted_nwb_file_ids)]

In [37]:
# parse relevant info related to stimulus, including duration, and amplitudes
def get_stim_info(abf_path, stim_channel_num = 1, stim_gain = 1, stim_name = 'sweepC'):
    abf = pyabf.ABF(abf_path)
    num_sweeps = abf.sweepCount
    stim_amps = np.zeros(num_sweeps) 
    stim_start_time = None
    stim_end_time = None
    sampling_rate = int(round(1/(abf.sweepX[2] - abf.sweepX[1]))) # manually calculate the sampling rate

    for i in range(0, num_sweeps):
        abf.setSweep(i, channel=stim_channel_num)
        sampling_rate = abf.dataRate
        if stim_name == 'sweepY':
            stim_vec = np.round(abf.sweepY * stim_gain)
        else:
            stim_vec = np.round(abf.sweepC * stim_gain)
        stim_amp = stim_vec[5000]

        stim_amps[i] = round(stim_amp)
        non_zero_inds = np.where(stim_vec == stim_amp)
        stim_duration = np.shape(non_zero_inds)[1] * 1/sampling_rate
        if stim_duration == 0:
            continue
        stim_start_ind = non_zero_inds[0][0]
        stim_end_ind = non_zero_inds[0][-1]
        
        stim_start_time = abf.sweepX[stim_start_ind]
        stim_end_time = abf.sweepX[stim_end_ind]

    ret_dict = {'stim_amp_vec' : stim_amps, 'stim_duration' : stim_duration, 
                'stim_start_time' : stim_start_time, 'stim_end_time' : stim_end_time, 'num_sweeps' : num_sweeps,
               'stim_sampling_rate' : sampling_rate}
    return(ret_dict)
    """
    return pd.Series([stim_amps, stim_duration, stim_start_time, stim_end_time, num_sweeps, sampling_rate], 
                     index=['stim_amp_vec', 'stim_duration', 'stim_start_time', 'stim_end_time', 'num_sweeps', 'stim_sampling_rate'])
    """

In [38]:
to_extract_stim_info

Unnamed: 0,file_name,path,timestamp,abf_version,abf_comments,file_id
18,18o22001.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-10-22 13:24:42.218,1.8.3.0,[C1. l2/3; rmp -74.5MV; p offset: -26 mv],18o22001
23,18o22020.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-10-22 15:19:43.328,1.8.3.0,"[C3, l2/3; rmp -68.4 MV; p offset: -20mv]",18o22020
30,18o22010.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-10-22 14:29:48.687,1.8.3.0,"[C2, l2/3; rmp -71.6 MV; p offset: -28mv]",18o22010
50,18329033.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-03-29 17:20:26.031,1.8.3.0,"[C9; L2/3; RMP 60.7; Offset -19mv,]",18329033
53,18329031.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-03-29 17:12:32.531,1.8.3.0,"[C8; L2/3; RMP -59.3 Offset -19mv,]",18329031
54,18329044.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-03-29 17:45:31.703,1.8.3.0,"[C9; L2/3; RMP -69.7 mv; Offset -19mv, Gain 2...",18329044
55,18329043.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-03-29 17:45:08.109,1.8.3.0,"[C9; L2/3; RMP -69.7 mv; Offset -19mv, Gain 2...",18329043
58,18329062.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-03-29 19:43:15.281,1.8.3.0,"[C13; L2/3; RMP -61.2mv; -17 mv,]",18329062
59,18329013.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-03-29 14:28:44.953,1.8.3.0,"[C3; L2/3; RMP -61.5,, C3; L2/3; RMP -61.5, Of...",18329013
60,18329014.abf,data/raw/recordings/Human tissue/Human tissue-...,2018-03-29 14:29:17.812,1.8.3.0,"[C3; L2/3; RMP -61.5, Offset -15 mv]",18329014


In [39]:
stim_info = {}
for i, row in to_extract_stim_info.iterrows():
    try:
        abf_info = get_stim_info(row.path, stim_name = 'sweepC')
    except (AttributeError, ValueError, IndexError) as e:
        print(e)
        abf_info = None
    stim_info[row.file_name] = abf_info
    
stim_info_df = pd.DataFrame(stim_info).T

index 5000 is out of bounds for axis 0 with size 3000
'ABF' object has no attribute '_stringsIndexed'
'ABF' object has no attribute '_stringsIndexed'


In [40]:
stim_info_dir = Path('./data/processed/meta/stiminfo/')
stim_info_dir.mkdir(exist_ok=True)

In [41]:
stim_info_df.index.name = 'abf_filename'
stim_info_df.to_csv(stim_info_dir / 'human2018_stiminfo.csv')