In [1]:
import os
import pyabf
import subprocess
import numpy as np
import pandas as pd
import warnings
import statsmodels.api as sm
lowess = sm.nonparametric.lowess
warnings.filterwarnings(action='once')

home_dir = '/media/foldy_lab/Storage_Analysis/Electrophysiology'

In [2]:
def get_ac_parameters(abf):
    # get header
    header = abf.headerText.split('\n')
    
    # read in basic values
    sweepCount = [int(line.split()[2]) for line in header if line.startswith('sweepCount')][0]
    sweepPointCount = [int(line.split()[2]) for line in header if line.startswith('sweepPointCount')][0]
    
    # get channel
    adcunit = [line.split(' = ')[1] for line in header if line.startswith('adcUnits')][0]
    adcunit = [data.strip("'") for data in adcunit.strip("][").split(', ')]
    channel = adcunit.index('mV')
    
    return sweepCount, sweepPointCount, channel

def get_cc_parameters(abf):
    # get header
    header = abf.headerText.split('\n')
    
    # get multiplier
    adcname = [line.split(' = ')[1] for line in header if line.startswith('adcNames')][0]
    adcname = [data.strip("'") for data in adcname.strip("][").split(', ')]
    adcunit = [line.split(' = ')[1] for line in header if line.startswith('adcUnits')][0]
    adcunit = [data.strip("'") for data in adcunit.strip("][").split(', ')]
    scale = [line.split(' = ')[1] for line in header if line.startswith('fDACScaleFactor')][0]
    scale = [float(data) for data in scale.strip("][").split(', ')]
    channel = adcunit.index('mV')
    
    # read in basic values
    #sweepCount = [int(line.split()[2]) for line in header if line.startswith('sweepCount')][0]
    #sweepStart = [int(float(line.split()[3][:-1])) for line in header if line.startswith('fEpochInitLevel')][0]
    #sweepStep = [int(float(line.split()[3][:-1])) for line in header if line.startswith('fEpochLevelInc')][0]
    #sweepPointCount = [int(line.split()[2]) for line in header if line.startswith('sweepPointCount')][0]
    
    # read in basic values
    sweepCount = [int(line.split()[2]) for line in header if line.startswith('sweepCount')][0]
    sweepStart = [line.split(' = ')[1].strip('][') for line in header if line.startswith('fEpochInitLevel')][0]
    sweepStart = [float(data) for data in sweepStart.strip("][").split(', ')][2*channel+1]
    sweepStep = [line.split(' = ')[1].strip('][') for line in header if line.startswith('fEpochLevelInc')][0]
    sweepStep = [float(data) for data in sweepStep.strip("][").split(', ')][2*channel+1]
    sweepPointCount = [int(line.split()[2]) for line in header if line.startswith('sweepPointCount')][0]
    
    if len(adcname) == 0:
        print('Error, no multiplier')
        scale = 400.
    if len(adcname) == 1:
        name = adcname[0]
        scale = scale[int(name.split()[-1])]
    else:
        name = adcname[adcunit.index('mV')]
        scale = scale[int(name.split()[-1])]
    
    scalefactor = 400. / scale
    sweepStart = sweepStart * scalefactor
    sweepStep = sweepStep * scalefactor
    
    return sweepCount, sweepStart, sweepStep, sweepPointCount, channel

def get_vc_parameters(abf):
    header = abf.headerText.split('\n')
    adcunit = [line.split(' = ')[1] for line in header if line.startswith('adcUnits')][0]
    adcunit = [data.strip("'") for data in adcunit.strip("][").split(', ')]
    
    header = [line.split() for line in header if line.split() != []]
    sweepCount = [int(line[2]) for line in header if line[0] == 'sweepCount'][0]
    sweepPointCount = [int(line[2]) for line in header if line[0] == 'sweepPointCount'][0]
    
    return sweepCount, sweepPointCount, adcunit

def create_ac_dataframe(fname):
    abf = pyabf.ABF(fname)
    sweepCount, sweepPointCount, channel = get_ac_parameters(abf)
    
    df = pd.DataFrame(0., index=np.arange(sweepPointCount), columns=np.arange(1, dtype=int))
    df.index = np.around(abf.sweepX * 1000, 2)
    df.index.name = 'Time (ms)'
    
    abf.setSweep(sweepNumber = 0, channel = channel)
    df[0] = abf.sweepY
    
    df.columns = ['Trace #0']
    
    return df

def create_cc_dataframe(fname):
    abf = pyabf.ABF(fname)
    sweepCount, sweepStart, sweepStep, sweepPointCount, channel = get_cc_parameters(abf)
    
    df = pd.DataFrame(0., index=np.arange(sweepPointCount), columns=np.arange(sweepCount, dtype = int))
    df.index = np.around(abf.sweepX * 1000, 2)
    df.index.name = 'Time (ms)'
    
    for col in df.columns:
        abf.setSweep(sweepNumber = col, channel = channel)
        df[col] = abf.sweepY
        
    df.columns = ['Trace #%d' % (col+1) for col in df.columns]
    
    return df, sweepStart, sweepStep

def create_vc_dataframe(fname):
    abf = pyabf.ABF(fname)
    sweepCount, sweepPointCount, adcunit = get_vc_parameters(abf)
    
    dfs = []
    
    for channel in range(len(adcunit)):   
        df = pd.DataFrame(0., index=np.arange(sweepPointCount), columns=np.arange(sweepCount, dtype = int))
        df.index = np.around(abf.sweepX * 1000, 2)
        df.index.name = 'Time (ms)'

        for col in df.columns:
            abf.setSweep(sweepNumber = col, channel=channel)
            df[col] = abf.sweepY

        data = df.mean(axis=1)
        df = pd.DataFrame(data.values, index=data.index, columns=['Trace #1'])
        dfs.append(df)
    
    df_params = pd.DataFrame(np.NaN, index=np.arange(len(dfs)), columns=['Diff', 'Area', 'ADC'])
    df_params.ADC = adcunit
    
    for row, df in enumerate(dfs):
        xvals = df.index
        yvals = df.iloc[:,0]
        params = {'frac':.040, 'delta':0.2, 'is_sorted':True, 'return_sorted':False}
        fit = lowess(yvals, xvals, **params)
        value = np.square(fit - np.mean(fit)).sum() * np.diff(xvals).mean()
        df_params.loc[row, 'Area'] = value
        df_params.loc[row, 'Diff'] = fit.max() - fit.min()
    
    index = df_params[df_params.ADC=='pA'].iloc[:,:2].idxmax(axis=0).values
    
    if np.unique(index).size == 1:
        channel = index[0]
        certain = True
    else:
        channel = index[1]
        certain = False
    
    return dfs[channel], certain

def save_ac_file(in_name, out_name):
    df = create_ac_dataframe(in_name)
    header = 'ATF\t1.0\n0\t8\n'
    
    with open(out_name, 'w') as ict:
        ict.write(header)
        
        df.to_csv(ict, sep='\t')
    
    return

def save_cc_file(in_name, out_name):
    df, sweepStart, sweepStep = create_cc_dataframe(in_name)
    header = 'ATF\t1.0\n0\t14\n'
    with open(out_name, 'w') as ict:
        ict.write(header)
        
        df.to_csv(ict, sep='\t')
    
    return sweepStart, sweepStep

def save_vc_file(in_name, out_name):
    df, confidence = create_vc_dataframe(in_name)
    header = 'ATF\t1.0\n0\t2\n'
    with open(out_name, 'w') as ict:
        ict.write(header)
        
        df.to_csv(ict, sep='\t')
        
    return confidence

def make_folders(folder):
    paths = ('%s/Cclamp/%s' % (home_dir, folder),
             '%s/Vclamp/%s' % (home_dir, folder),
             '%s/Aclamp/%s' % (home_dir, folder)
            )
    
    for path in paths:
        if not os.path.isdir(path):
            subprocess.run(['mkdir', path])
    
    return

def create_cell(cellname, ccname, vcname, folder, acname=''):
    make_folders(folder)
    
    cc_out = '%s/Cclamp/%s/%s_CC.atf' % (home_dir, folder, cellname)
    cc_in = '%s/ABF/%s.abf' % (home_dir, ccname)
    vc_out = '%s/Vclamp/%s/%s_VC.atf' % (home_dir, folder, cellname)
    vc_in = '%s/ABF/%s.abf' % (home_dir, vcname)
    
    if len(ccname) > 0:
        sweepStart, sweepStep = save_cc_file(cc_in, cc_out)
    else:
        sweepStart, sweepStep = np.NaN, np.NaN
    if len(vcname) > 0:
        confidence = save_vc_file(vc_in, vc_out)
    else:
        confidence = True
    
    if len(acname) > 0:
        ac_out = '%s/Aclamp/%s/%s_AC.atf' % (home_dir, folder, cellname)
        ac_in = '%s/ABF/%s.abf' % (home_dir, acname)
        save_ac_file(ac_in, ac_out)
    
    if not confidence:
        print(cellname, 'Uncertain about Vclamp Channel')
    
    return sweepStart, sweepStep

def remove_missing(df):
    
    missing = []
    drop = []
    for cell, row in df.iterrows():
        ccname = row.CCName
        vcname = row.VCName
        to_drop = False
        
        if len(ccname) > 0:
            cc_in = '%s/ABF/%s.abf' % (home_dir, row.CCName)
            if not os.path.isfile(cc_in):
                missing.append((cell, row.CCName, 'CC File'))
                to_drop = True
        
        if len(vcname) > 0:
            vc_in = '%s/ABF/%s.abf' % (home_dir, row.VCName)
            if not os.path.isfile(vc_in):
                missing.append((cell, row.VCName, 'VC File'))
                to_drop = True
            
        if to_drop:
            drop.append(cell)
    
    if len(drop) > 0:
        df = df.loc[~(df.index.isin(drop))]
        
        print('The following cells are missing the listed files, and have been dropped from the convesion:')
        missing = ['\t'.join(miss) for miss in missing]
        print('\n'.join(missing))
    
    return df

def save_cell_parameters(df_out, df_read):
    df = pd.read_csv('references/cell_parameters.tsv', sep='\t', header=0, index_col=0)
    df_save = df_out.loc[:,['celltype', 'cc_directory', 'cc_ending',
                            'vc_directory', 'vc_ending',
                            'ac_directory', 'ac_ending',
                            'category']]
    for column in df_save.columns:
        if column not in df.columns:
            df[column] = ''
    df = df[df_save.columns].copy()
    
    overlap = df_save.index[df_save.index.isin(df.index)]
    df.loc[overlap,:] = df_save.loc[overlap,:].values
    df = pd.concat((df, df_save.loc[~(df_save.index.isin(overlap))]), axis=0)
    
    df.to_csv('references/cell_parameters.tsv', sep='\t')
    
    df = pd.read_csv('references/recording config_cc.tsv', sep='\t', header=0, index_col=0)
    df_save = df_out.loc[:,['celltype', 'start (pA)', 'step (pA)']]
    for column in df_save.columns:
        if column not in df.columns:
            df[column] = ''
    df = df[df_save.columns].copy()
    
    overlap = df_save.index[df_save.index.isin(df.index)]
    df.loc[overlap,:] = df_save.loc[overlap,:].values
    df = pd.concat((df, df_save.loc[~(df_save.index.isin(overlap))]), axis=0)
    
    df.to_csv('references/recording config_cc.tsv', sep='\t')
    
    df = pd.read_csv('references/ABF_Matches.tsv', sep='\t', header=0, index_col=0, dtype=str)
    for column in df_read.columns:
        if column not in df.columns:
            df[column] = ''
    df = df[df_read.columns]
    
    overlap = df_read.index[df_read.index.isin(df.index)]
    df.loc[overlap,:] = df_read.loc[overlap,:].values
    df = pd.concat((df, df_read.loc[~(df_read.index.isin(overlap))]), axis=0)
    
    df.to_csv('references/ABF_Matches.tsv', sep='\t')
    
    return
    
def convert_files(fname):
    if fname.endswith('.xls') or fname.endswith('.xlsx'):
        df = pd.read_excel('references/%s' % fname, header=0, index_col=0, dtype=str, engine="openpyxl")
        df.fillna('', inplace=True)
        df = df.astype(str)
    else:
        df = pd.read_csv('references/%s' % fname, sep='\t', header=0, index_col=0, dtype=str)
        df.fillna('', inplace=True)
        df = df.astype(str)
    
    columns = ['CellType', 'Project', 'CCName', 'VCName']
    if 'ACName' in df.columns:
        columns.append('ACName')
    df = df[columns]
    if 'ACName' not in df.columns:
        df['ACName'] = ''
    df = remove_missing(df)
    if len(df) == 0:
        return
    
    df_out = df.copy()
    df_out['start (pA)'] = 0
    df_out['step (pA)'] = 0
    df_out['cc_ending'] = '_CC.atf'
    df_out['vc_ending'] = '_VC.atf'
    df_out['cc_directory'] = 'Cclamp/' + df.Project
    df_out['vc_directory'] = 'Vclamp/' + df.Project
    df_out['category'] = df_out.Project
    df_out['celltype'] = df_out.CellType
    
    has_file = [len(acname)>0 for acname in df.ACName]
    if sum(has_file) > 0:
        df_out['ac_ending'] = ''
        df_out['ac_directory'] = ''
        df_out.loc[has_file, 'ac_ending'] = '_AC.atf'
        df_out.loc[has_file, 'ac_directory'] = 'Aclamp/' + df_out.loc[has_file].Project
    
    problems = []
    for cell, row in df.iterrows():
        try:
            sweepStart, sweepStep = create_cell(cell, row.CCName, row.VCName, row.Project, acname=row.ACName)
            df_out.loc[cell, 'start (pA)'] = sweepStart
            df_out.loc[cell, 'step (pA)'] = sweepStep
        except:
            problems.append(cell)
    
    df_out = df_out.loc[~df_out.index.isin(problems),:]
    save_cell_parameters(df_out, df)
    if len(problems) > 0:
        print('These cells had problems:\n%s' % '\n'.join(problems))
    
    return

def fix_parameters():
    df_read = pd.read_csv('references/ABF_Matches.tsv', sep='\t', header=0, index_col=0)
    
    df_read = df[['CellType', 'Project', 'CCName', 'VCName']]
    
    df_out  = df_read.copy()
    df_out['start (pA)'] = 0
    df_out['step (pA)'] = 0
    df_out['cc_ending'] = '_CC.atf'
    df_out['vc_ending'] = '_VC.atf'
    df_out['cc_directory'] = 'Cclamp/' + df.Project
    df_out['vc_directory'] = 'Vclamp/' + df.Project
    df_out['category'] = df_out.Project
    df_out['celltype'] = df_out.CellType
    
    abf_name = '/media/foldy_lab/Storage_Analysis/Electrophysiology/ABF/%s.abf'
    ephys_dir = '/media/foldy_lab/Storage_Analysis/Electrophysiology'
    
    present_cc = [os.path.isfile(abf_name % CCName) for CCName in df_out.CCName]
    present_vc = [os.path.isfile(abf_name % VCName) for VCName in df_out.VCName]
    present = np.logical_or(present_cc, present_vc)
    df_out = df_out[present].copy()
    
    missing = []
    problems = []
    
    for cell, row in df_out.iterrows():
        cc_in = abf_name % row.CCName
        vc_in = abf_name % row.VCName
        cc_out = '%s/%s/%s%s' % (ephys_dir, row.cc_directory, cell, row.cc_ending)
        vc_out = '%s/%s/%s%s' % (ephys_dir, row.vc_directory, cell, row.vc_ending)
        
        if os.path.isfile(cc_in):
            try:
                sweepStart, sweepStep = save_cc_file(cc_in, cc_out)
                df_out.loc[cell, 'start (pA)'] = sweepStart
                df_out.loc[cell, 'step (pA)'] = sweepStep
            except:
                problems.append(cell)
                continue
        else:
            missing.append(cell)
            continue
        if os.path.isfile(vc_in):
            save_vc_file(vc_in, vc_out)
    
    df_out = df_out[~df_out.index.isin(problems)]
    df_out = df_out[~df_out.index.isin(missing)]
    df_read = df_read.loc[df_out.index]
    save_cell_parameters(df_out, df_read)
    
    print('Missing:%s' % '\n'.join(missing))
    print('Problems:%s' % '\n'.join(problems))
    
    return

In [3]:
%%time

convert_files('Benlist.xlsx')

  if value == "~SECTION~":










These cells had problems:
Mut_031
Mut_043
Mut_044
CPU times: user 23min 43s, sys: 3.96 s, total: 23min 47s
Wall time: 24min 4s


In [4]:
%%time

"""
run this notebook to regenerate all ATF files from ABFs. Takes a long time, do NOT run this
unless necessary
"""

#fix_parameters()
pass

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 4.53 µs
