<a href="https://colab.research.google.com/github/dtabuena/Patch_Ephys/blob/main/abf_handling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

def catalogue_recs(file_loc,cell_id_order):
    'Read metadata from abf files stored in chosen folder and assigns'
    'them to a dataframe for further processing. All further abf analyses'
    'read files from this df and report values in the df.'

    file_list = get_sub_files(file_loc)
    # file_list = [file_loc+'/'+f for f in file_list]

    file_list=[f for f in file_list if '.abf' in f]

    abf_recordings_df = pd.DataFrame(data = file_list, columns=['file_name'])
    abf_recordings_df = abf_recordings_df.set_index('file_name')

    abf_recordings_df['Recording_name'] = None
    abf_recordings_df['cell_id'] = None
    for c in cell_id_order:
        abf_recordings_df[c] = None

    abf_recordings_df["protocol"] = None
    abf_recordings_df["abf_timestamp"] = None
    abf_recordings_df["channelList"] = None


    for r in np.arange(len(abf_recordings_df)):
        try:
            row_filename = abf_recordings_df.index[r]
            if '.sta' in row_filename:
                continue
            base_name = os.path.basename(row_filename)
            abf_recordings_df.loc[row_filename,'Recording_name'] = base_name
            split_words = base_name.split('_')
            re_code = ['_'+split_words[i] for i in range(len(cell_id_order))]
            re_code = ''.join(re_code)[1:]
            abf_recordings_df.loc[row_filename,'cell_id'] = re_code
            for ci in range(len(cell_id_order)):
                abf_recordings_df.loc[row_filename,cell_id_order[ci]] = split_words[ci]

            abf = pyabf.ABF(row_filename)
            abf_recordings_df.loc[row_filename,'protocol'] = abf.protocol
            abf_recordings_df.at[row_filename,'channelList'] = abf.channelList
            abf_recordings_df.at[row_filename,'abf_timestamp'] = abf.abfDateTimeString
        except:
            print(f'ERROR on :{row_filename}')
    abf_recordings_df.sort_values('file_name',inplace=True)
    protocol_set = list(set(abf_recordings_df['protocol']))
    return abf_recordings_df, protocol_set



In [None]:
def reorg_abfs(look_up_file, new_dir='reorg', local_abf_dir='/content/my_ephys_data'):
    'Reorganize ABFs by clamp protocol'

    lut_df = pd.read_csv('/content/Fast_Data_Recording_LookUp.csv')
    try: shutil.rmtree('/content/'+new_dir)
    except: None
    os.mkdir('/content/'+new_dir)
    for subdir, dirs, fils in os.walk(rootdir):
        for file in fils:
            full_path = (os.path.join(subdir, file))
            abf_file = abf_or_name(full_path)
            prot_name = abf_file.protocol
            new_prot_dir = new_dir+'/'+prot_name
            try: os.mkdir('/content/'+new_prot_dir)
            except: None
            shutil.copyfile(full_path, new_prot_dir+'/'+file)
    shutil.make_archive(new_dir, 'zip', new_dir)
    from google.colab import files
    files.download(new_dir+'.zip')
    return None

In [None]:
def get_sub_files(rootdir):
    'Recursively search subfolders and return a list of all files'
    file_list =[]
    for rootdir, dirs, files in os.walk(rootdir):
            file_list.extend([os.path.join(rootdir,f) for f in files])
    return file_list


In [None]:
def cell_prot_lut(abf_recordings_df,protocol_set,csv_name='Protocol_LUT',download=True):
    if '.csv' not in csv_name: csv_name =csv_name+'.csv'
    file_index = list(abf_recordings_df.index)

    cell_index = list(set(abf_recordings_df['cell_id']))

    lut_df = pd.DataFrame(index=cell_index, columns=protocol_set).sort_index()


    for f in file_index:
        col_pos = abf_recordings_df.loc[f,'protocol']
        row_pos = abf_recordings_df.loc[f,'cell_id']
        lut_df.at[row_pos,col_pos] = os.path.basename(f)
    lut_df.to_csv(csv_name)
    # if download:
    #     files.download(csv_name)
    return csv_name

In [None]:
def daily_norm(control_df,test_df,focus_col,exclusions):
    params_to_avg = [c for c in control_df.columns if c not in exclusions]

    control_df_norm = control_df.copy()
    test_df_norm = test_df.copy()

    unique_col_vals = list(set(control_df_norm[focus_col]))
    control_means = pd.DataFrame(index = unique_col_vals, columns = params_to_avg)
    control_means

    for ucv in unique_col_vals:
        row_bool = [ucv == v for v in control_df_norm[focus_col] ]
        for p in params_to_avg:
            try:
                matching_param_vals = control_df_norm.loc[row_bool,p]
                matching_param_vals = [v for v in matching_param_vals if v is not None]
                control_means.at[ucv,p] = np.nanmean(matching_param_vals).item()
            except:
                print('Error: ',ucv,p)
                print(matching_param_vals)
                print('Error: ',ucv,p)
    try:
        for cc in control_df_norm.index:
            for p in params_to_avg:
                norm_bin = control_df_norm.loc[cc,focus_col]
                norm_base_val = control_means.loc[norm_bin,p]
                current_val = control_df_norm.loc[cc,p]
                if norm_base_val == 0:
                   control_df_norm.at[cc,p] = 'zero baseline'
                else:
                    control_df_norm.at[cc,p] = control_df_norm.loc[cc,p] / norm_base_val
        for ct in control_df_norm.index:
            for p in params_to_avg:
                norm_bin = control_df_norm.loc[ct,focus_col]
                norm_base_val = control_means.loc[norm_bin,p]
                current_val = test_df.loc[ct,p]
                test_df.at[ct,p] = test_df.loc[ct,p] / norm_base_val
                if norm_base_val == 0:
                   test_df.at[cc,p] = 'zero baseline'
                else:
                    test_df.at[cc,p] = test_df.loc[cc,p] / norm_base_val
    except:
        try: print(cc)
        except: None
        try: print(ct)
        except: None
        try: print(p)
        except: None
        try: print(norm_base_val)
        except: None
        try: print(current_val)
        except: None
    return control_df_norm, test_df, control_means



# exclusions = ['Recording_name', 'Rec_date', 'Virus', 'GenoType', 'Sex',
#               'Age', 'Slice_Num', 'Cell_num', 'Cell_Type', 'protocol',
#               'abf_timestamp', 'channelList', 'Stim_Levels_(pA)', 'Spike_Counts',
#               'IV_Steady_State_(V_stim)', 'IV_Steady_State_(I_peak)', 'IV_Steady_State_(I_mean)',
#               'IV_Early_(range)','IV_Early_(I_peak)','IV_Early_(I_mean)','IV_Early_(V_stim)','IV_Steady_State_(range)' ]

# control_df = new_dfs['CA3xNEG']
# test_df = new_dfs['CA3xPOS']
# focus_col = 'Rec_date'

# _,_,control_means = daily_norm(control_df,test_df,focus_col,exclusions)

# display(control_means)