<a href="https://colab.research.google.com/github/dtabuena/Patch_Ephys/blob/main/abf_batching.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import pyabf
import os
import pandas as pd

In [36]:
def catalogue_recs(file_loc, file_naming_scheme):
    file_list = [
        os.path.join(root, f)
        for root, _, files in os.walk(file_loc)
        for f in files if f.endswith('.abf')
    ]

    abf_recordings_df = pd.DataFrame(file_list, columns=['file_name']).set_index('file_name')
    for col in file_naming_scheme + ['cell_id', 'protocol', 'abf_timestamp', 'channelList']:
        abf_recordings_df[col] = None

    for filename in abf_recordings_df.index:
        if filename.endswith('.sta'):
            continue

        try:
            abf = pyabf.ABF(filename)
        except (pyabf.ABFException, FileNotFoundError) as e:
            print(f'ABF import error ({filename}): {e}')
            continue

        try:
            base_name = os.path.basename(filename)
            split_words = base_name.split('_')

            cell_id = '_'.join(split_words[:len(file_naming_scheme)])
            abf_recordings_df.at[filename, 'cell_id'] = cell_id

            for idx, field in enumerate(file_naming_scheme):
                abf_recordings_df.at[filename, field] = split_words[idx]

            abf_recordings_df.at[filename, 'protocol'] = abf.protocol
            abf_recordings_df.at[filename, 'abf_timestamp'] = abf.abfDateTime
            abf_recordings_df.at[filename, 'channelList'] = abf.channelList
        except (IndexError, AttributeError, ValueError) as e:
            print(f'Data handling error ({filename}): {e}')
            continue

    abf_recordings_df.sort_index(inplace=True)
    protocol_groups = {protocol: df for protocol, df in abf_recordings_df.groupby('protocol')}

    return protocol_groups



# dataset = {'data_name': 'data',
#            'data_source': r"D:\Dropbox (Gladstone)\Gladstone Dropbox\Dennis Tabuena\0_Projects\_NPRS_Project\Recs",
#            'file_naming_scheme': ['Rec_date','Region','Slice_Num'],
#            }

# protocol_groups = catalogue_recs(dataset['data_source'],dataset['file_naming_scheme'])



In [42]:
def cell_prot_lut(protocol_groups, csv_name='Protocol_LUT.csv'):
    import pandas as pd
    import os

    rows = []
    for protocol, df in protocol_groups.items():
        df = df.copy()
        df.reset_index(inplace=True)  # to keep filename as column
        df['protocol'] = protocol
        df['file_name'] = df['file_name'].apply(os.path.basename)
        rows.append(df)

    lut_df = pd.concat(rows, ignore_index=True)
    cols = ['file_name', 'cell_id', 'protocol'] + [col for col in lut_df.columns if col not in ['file_name', 'cell_id', 'protocol']]
    lut_df = lut_df[cols].sort_values(by=['cell_id', 'protocol'])

    lut_df.to_csv(csv_name, index=False)

    return csv_name



ValueError: Index contains duplicate entries, cannot reshape