In [1]:
import os
import pyabf
import subprocess
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings(action='once')

In [2]:
def get_cc_parameters(abf):
    # get header
    header = abf.headerText.split('\n')
    
    # read in basic values
    sweepCount = [int(line.split()[2]) for line in header if line.startswith('sweepCount')][0]
    sweepStart = [int(float(line.split()[3][:-1])) for line in header if line.startswith('fEpochInitLevel')][0]
    sweepStep = [int(float(line.split()[3][:-1])) for line in header if line.startswith('fEpochLevelInc')][0]
    sweepPointCount = [int(line.split()[2]) for line in header if line.startswith('sweepPointCount')][0]
    
    # get multiplier
    adcname = [line.split(' = ')[1] for line in header if line.startswith('adcNames')][0]
    adcname = [data.strip("'") for data in adcname.strip("][").split(', ')]
    adcunit = [line.split(' = ')[1] for line in header if line.startswith('adcUnits')][0]
    adcunit = [data.strip("'") for data in adcunit.strip("][").split(', ')]
    scale = [line.split(' = ')[1] for line in header if line.startswith('fDACScaleFactor')][0]
    scale = [float(data) for data in scale.strip("][").split(', ')]
    
    if len(adcname) == 0:
        print('Error, no multiplier')
        scale = 400.
    if len(adcname) == 1:
        name = adcname[0]
        scale = scale[int(name.split()[-1])]
    else:
        name = adcname[adcunit.index('mV')]
        scale = scale[int(name.split()[-1])]
    
    scalefactor = 400. / scale
    sweepStart = sweepStart * scalefactor
    sweepStep = sweepStep * scalefactor
    
    return sweepCount, sweepStart, sweepStep, sweepPointCount

def get_vc_parameters(abf):
    header = abf.headerText.split('\n')
    header = [line.split() for line in header if line.split() != []]
    
    sweepCount = [int(line[2]) for line in header if line[0] == 'sweepCount'][0]
    sweepPointCount = [int(line[2]) for line in header if line[0] == 'sweepPointCount'][0]
    
    return sweepCount, sweepPointCount

def create_cc_dataframe(fname):
    abf = pyabf.ABF(fname)
    sweepCount, sweepStart, sweepStep, sweepPointCount = get_cc_parameters(abf)
    
    df = pd.DataFrame(0., index=np.arange(sweepPointCount), columns=np.arange(sweepCount, dtype = int))
    df.index = np.around(abf.sweepX * 1000, 2)
    df.index.name = 'Time (ms)'
    
    for col in df.columns:
        abf.setSweep(col)
        df[col] = abf.sweepY
        
    df.columns = ['Trace #%d' % (col+1) for col in df.columns]
    
    return df, sweepStart, sweepStep

def create_vc_dataframe(fname):
    abf = pyabf.ABF(fname)
    sweepCount, sweepPointCount = get_vc_parameters(abf)
    
    df = pd.DataFrame(0., index=np.arange(sweepPointCount), columns=np.arange(sweepCount, dtype = int))
    df.index = np.around(abf.sweepX * 1000, 2)
    df.index.name = 'Time (ms)'
    
    for col in df.columns:
        abf.setSweep(col)
        df[col] = abf.sweepY
        
    data = df.mean(axis=1)
    df = pd.DataFrame(data.values, index=data.index, columns=['Trace #1'])
    
    return df

def save_cc_file(in_name, out_name):
    df, sweepStart, sweepStep = create_cc_dataframe(in_name)
    header = 'ATF\t1.0\n0\t14\n'
    with open(out_name, 'w') as ict:
        ict.write(header)
        
        df.to_csv(ict, sep='\t')
    
    return sweepStart, sweepStep

def save_vc_file(in_name, out_name):
    df = create_vc_dataframe(in_name)
    header = 'ATF\t1.0\n0\t2\n'
    with open(out_name, 'w') as ict:
        ict.write(header)
        
        df.to_csv(ict, sep='\t')
        
    return

def make_folders(folder):
    paths = 'EphysData/Cclamp/%s' % folder, 'EphysData/Vclamp/%s' % folder
    
    for path in paths:
        if not os.path.isdir(path):
            subprocess.run(['mkdir', path])
    
    return

def create_cell(cellname, ccname, vcname, folder):
    make_folders(folder)
    
    cc_out = 'EphysData/Cclamp/%s/%s_CC.atf' % (folder, cellname)
    cc_in = 'EphysData/ABF/%s.abf' % ccname
    vc_out = 'EphysData/Vclamp/%s/%s_VC.atf' % (folder, cellname)
    vc_in = 'EphysData/ABF/%s.abf' % vcname
    
    sweepStart, sweepStep = save_cc_file(cc_in, cc_out)
    save_vc_file(vc_in, vc_out)
    
    return sweepStart, sweepStep

def remove_missing(df):
    missing = []
    drop = []
    for cell, row in df.iterrows():
        ccname = row.CCName
        vcname = row.VCName
        to_drop = False
        
        if len(ccname) > 0:
            cc_in = 'EphysData/ABF/%s.abf' % row.CCName
            if not os.path.isfile(cc_in):
                missing.append((cell, row.CCName, 'CC File'))
                to_drop = True
        
        if len(vcname) > 0:
            vc_in = 'EphysData/ABF/%s.abf' % row.VCName
            if not os.path.isfile(vc_in):
                missing.append((cell, row.VCName, 'VC File'))
                to_drop = True
            
        if to_drop:
            drop.append(cell)
    
    if len(drop) > 0:
        df = df.loc[~(df.index.isin(drop))]
        
        print('The following cells are missing the listed files, and have been dropped from the convesion:')
        missing = ['\t'.join(miss) for miss in missing]
        print('\n'.join(missing))
    
    return df

def save_cell_parameters(df_out, df_read):
    df = pd.read_csv('references/cell_parameters.tsv', sep='\t', header=0, index_col=0)
    df_save = df_out.loc[:,['celltype', 'cc_directory', 'cc_ending', 'vc_directory', 'vc_ending', 'category']]
    overlap = df_save.index[df_save.index.isin(df.index)]
    df.loc[overlap,:] = df_save.loc[overlap,:].values
    
    df = pd.concat((df, df_save.loc[~(df_save.index.isin(overlap))]), axis=0)
    
    df.to_csv('references/cell_parameters.tsv', sep='\t')
    
    df = pd.read_csv('references/recording config_cc.tsv', sep='\t', header=0, index_col=0)
    df_save = df_out.loc[:,['celltype', 'start (pA)', 'step (pA)']]
    overlap = df_save.index[df_save.index.isin(df.index)]
    df.loc[overlap,:] = df_save.loc[overlap,:].values
    
    df = pd.concat((df, df_save.loc[~(df_save.index.isin(overlap))]), axis=0)
    
    df.to_csv('references/recording config_cc.tsv', sep='\t')
    
    df = pd.read_csv('references/ABF_Matches.tsv', sep='\t', header=0, index_col=0, dtype=str)
    overlap = df_read.index[df_read.index.isin(df.index)]
    df.loc[overlap,:] = df_read.loc[overlap,:].values
    
    df = pd.concat((df, df_read.loc[~(df_read.index.isin(overlap))]), axis=0)
    
    df.to_csv('references/ABF_Matches.tsv', sep='\t')
    
    return
    
def convert_files(reference_file):
    fname = f'References/ConversionFiles/{reference_file}'
    if fname.endswith('.xls') or fname.endswith('.xlsx'):
        df = pd.read_excel(fname, header=0, index_col=0, dtype=str)
    elif fname.endswith('.csv'):
        df = pd.read_csv(fname, sep=',', header=0, index_col=0, dtype=str)
    else:
        df = pd.read_csv(fname, sep='\t', header=0, index_col=0, dtype=str)
    
    df = df[['CellType', 'Project', 'CCName', 'VCName']]
    df = remove_missing(df)
    if len(df) == 0:
        return
    
    df_out  = df.copy()
    df_out['start (pA)'] = 0
    df_out['step (pA)'] = 0
    df_out['cc_ending'] = '_CC.atf'
    df_out['vc_ending'] = '_VC.atf'
    df_out['cc_directory'] = 'Cclamp/' + df.Project
    df_out['vc_directory'] = 'Vclamp/' + df.Project
    df_out['category'] = df_out.Project
    df_out['celltype'] = df_out.CellType
    
    problems = []
    for cell, row in df.iterrows():
        try:
            sweepStart, sweepStep = create_cell(cell, row.CCName, row.VCName, row.Project)
            df_out.loc[cell, 'start (pA)'] = sweepStart
            df_out.loc[cell, 'step (pA)'] = sweepStep
        except:
            problems.append(cell)
    
    df_out = df_out.loc[~df_out.index.isin(problems),:]
    save_cell_parameters(df_out, df)
    if len(problems) > 0:
        print('These cells had problems:\n%s' % '\n'.join(problems))
    
    return

def create_cell(cellname, ccname, vcname, folder):
    make_folders(folder)
    
    cc_out = 'EphysData/Cclamp/%s/%s_CC.atf' % (folder, cellname)
    cc_in = 'EphysData/ABF/%s.abf' % ccname
    vc_out = 'EphysData/Vclamp/%s/%s_VC.atf' % (folder, cellname)
    vc_in = 'EphysData/ABF/%s.abf' % vcname
    
    sweepStart, sweepStep = save_cc_file(cc_in, cc_out)
    save_vc_file(vc_in, vc_out)
    
    return sweepStart, sweepStep

def fix_parameters():
    df_read = pd.read_csv('references/ABF_Matches.tsv', sep='\t', header=0, index_col=0)
    
    df_read = df[['CellType', 'Project', 'CCName', 'VCName']]
    
    df_out  = df_read.copy()
    df_out['start (pA)'] = 0
    df_out['step (pA)'] = 0
    df_out['cc_ending'] = '_CC.atf'
    df_out['vc_ending'] = '_VC.atf'
    df_out['cc_directory'] = 'Cclamp/' + df.Project
    df_out['vc_directory'] = 'Vclamp/' + df.Project
    df_out['category'] = df_out.Project
    df_out['celltype'] = df_out.CellType
    
    abf_name = '/media/foldy_lab/Storage_Analysis/Electrophysiology/ABF/%s.abf'
    ephys_dir = '/media/foldy_lab/Storage_Analysis/Electrophysiology'
    
    present_cc = [os.path.isfile(abf_name % CCName) for CCName in df_out.CCName]
    present_vc = [os.path.isfile(abf_name % VCName) for VCName in df_out.VCName]
    present = np.logical_or(present_cc, present_vc)
    df_out = df_out[present].copy()
    
    missing = []
    problems = []
    
    for cell, row in df_out.iterrows():
        cc_in = abf_name % row.CCName
        vc_in = abf_name % row.VCName
        cc_out = '%s/%s/%s%s' % (ephys_dir, row.cc_directory, cell, row.cc_ending)
        vc_out = '%s/%s/%s%s' % (ephys_dir, row.vc_directory, cell, row.vc_ending)
        
        if os.path.isfile(cc_in):
            try:
                sweepStart, sweepStep = save_cc_file(cc_in, cc_out)
                df_out.loc[cell, 'start (pA)'] = sweepStart
                df_out.loc[cell, 'step (pA)'] = sweepStep
            except:
                problems.append(cell)
                continue
        else:
            missing.append(cell)
            continue
        if os.path.isfile(vc_in):
            save_vc_file(vc_in, vc_out)
    
    df_out = df_out[~df_out.index.isin(problems)]
    df_out = df_out[~df_out.index.isin(missing)]
    df_read = df_read.loc[df_out.index]
    save_cell_parameters(df_out, df_read)
    
    print('Missing:%s' % '\n'.join(missing))
    print('Problems:%s' % '\n'.join(problems))
    
    return

In [3]:
%%time

reference_file = 'Id2_CA1_titter_Natalia.xlsx'

convert_files(reference_file)

The following cells are missing the listed files, and have been dropped from the convesion:
Id2_1	21802001	CC File
Id2_1	21802000	VC File
Id2_2	21802004	CC File
Id2_2	21802003	VC File
Id2_3	21802007	CC File
Id2_3	21802006	VC File
Id2_4	21802010	CC File
Id2_4	21802009	VC File
Id2_5	21802013	CC File
Id2_5	21802012	VC File
Id2_6	21802016	CC File
Id2_6	21802015	VC File
Id2_7	21802018	CC File
Id2_7	21802017	VC File
Id2_8	21802021	CC File
Id2_8	21802020	VC File
Id2_9	21802024	CC File
Id2_9	21802023	VC File
Id2_10	21802027	CC File
Id2_10	21802026	VC File
Id2_11	21802030	CC File
Id2_11	21802029	VC File
Id2_12	21802034	CC File
Id2_12	21802032	VC File
Id2_13	21802038	CC File
Id2_13	21802037	VC File
Id2_14	21802041	CC File
Id2_14	21802040	VC File
Id2_15	21802044	CC File
Id2_15	21802043	VC File
Id2_16	21802047	CC File
Id2_16	21802046	VC File
Id2_17	21802050	CC File
Id2_17	21802049	VC File
Id2_18	21802053	CC File
Id2_18	21802052	VC File
Id2_19	21802056	CC File
Id2_19	21802055	VC File
Id2_20	2180205

  for elem in self.tree.iter() if Element_has_iter else self.tree.getiterator():


In [4]:
%%time

"""
run this notebook to regenerate all ATF files from ABFs. Takes a long time, do NOT run this
unless necessary
"""

#fix_parameters()
pass

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.25 µs


In [5]:
    fname = 'Sina_list_exp2.xlsx'
    if fname.endswith('.xls') or fname.endswith('.xlsx'):
        df = pd.read_excel('references/%s' % fname, header=0, index_col=0, dtype=str)
    else:
        df = pd.read_csv('references/%s' % fname, sep='\t', header=0, index_col=0, dtype=str)
    
    df = df[['CellType', 'Project', 'CCName', 'VCName']]
    #df = df.loc[['ME76', 'ME84', 'ME99', 'ME112']]
    df = remove_missing(df)

The following cells are missing the listed files, and have been dropped from the convesion:
Red4w_01	21610001	CC File
Red4w_01	21610000	VC File
Red4w_02	21610005	CC File
Red4w_02	21610004	VC File
Red4w_03	21610008	CC File
Red4w_03	21610007	VC File
Red4w_04	21610011	CC File
Red4w_04	21610010	VC File
Red4w_05	21610015	CC File
Red4w_05	21610013	VC File
Red4w_06	21610018	CC File
Red4w_06	21610017	VC File
Red4w_07	21610022	CC File
Red4w_07	21610021	VC File
Red4w_08	21610027	CC File
Red4w_08	21610026	VC File
Red4w_09	21610030	CC File
Red4w_09	21610029	VC File
Red4w_10	21611019	CC File
Red4w_10	21611018	VC File
Red4w_11	21611022	CC File
Red4w_11	21611021	VC File
Red4w_12	21611025	CC File
Red4w_12	21611027	VC File
Red4w_13	21611029	CC File
Red4w_13	21611028	VC File
Red4w_14	21611032	CC File
Red4w_14	21611031	VC File
Red4w_15	21611035	CC File
Red4w_15	21611034	VC File
Red4w_16	21611038	CC File
Red4w_16	21611037	VC File
Red4w_17	21611041	CC File
Red4w_17	21611040	VC File
Red4w_18	21611043	CC Fil

In [6]:
def get_files(df, cell):
    ccname, vcname = df.loc[cell, ['CCName', 'VCName']]
    cc_in = 'EphysData/ABF/%s.abf' % ccname
    vc_in = 'EphysData/ABF/%s.abf' % vcname
    
    return cc_in, vc_in

In [7]:
cc_in, vc_in = get_files(df, 'ME84')
df_vc = create_vc_dataframe(vc_in)

KeyError: 'ME84'

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(df_vc.index, df_vc.iloc[:,0])
plt.show()