In [41]:
# CREATED: 17-NOV-2022
# LAST EDIT: 22-NOV-2022
# AUTHOR: DUANE RINEHART, MBA (drinehart@ucsd.edu)

# IMPLEMENTS CONVERSION OF EXTRACELLULAR ELECTROPHYSIOLOGY DATASETS TO NEURODATA WITHOUT BORDERS (NWB) nbformat
# REQUIREMENT BY GRANT TO UPLOAD DATA IN OPEN FORMAT TO PUBIC-ACCESSIBLE REPOSITORY

import os, math, time, pynwb
from pathlib import Path, PurePath, PureWindowsPath
from datetime import datetime, timedelta
from dateutil.tz import tzlocal
import pandas as pd
from pathlib import PurePath

from ConvertIntanToNWB import convert_to_nwb

#################################################################
# APP CONSTANTS
excel_file = 'input.xlsx'
base_path = Path('/mnt/e/temp/Devor-gdrive/')
infile = Path(base_path, excel_file)
debug = True
#################################################################

def load_data():
    '''Used for meta-data loading'''
    lstNWBFields = ['session_id', 'age', 'subject_description', 'species', 'genotype', 'sex', 'subject_strain', 'subject_weight', 'session_description', 'subject_id', 'pharmacology', 'date_of_birth(YYYY-MM-DD)', 'src_folder_directory', 'stimulus_notes_include', 'stimulus_notes_paradigm', 'stimulus_notes_direct_electrical_stimulation', 'stimulus_notes_direct_electrical_stimulation_paradigm', 'pharmacology_notes_anesthetized_during_recording', 'electrode_recordings', 'electrode_recordings_type', 'electrode_recordings_contact_material', 'electrode_recordings_substrate'] #headers I need

    lstExtractionFields = pd.read_excel(infile, sheet_name="auto", usecols=lstNWBFields) #just extract columns/fields I need
    return lstExtractionFields


def get_measurements_data(src_folder_directory: str, channel_map_recordings_file: str, debug: bool = False):
    '''Used for electrode measurements table processing'''
    electrode_data = None
    if Path(channel_map_recordings_file).suffix == '.xls':
        #Note: must use 'PureWindowsPath' for org dataset (base_directory)
        #N.B. must use Path to extract data
        base_directory = PureWindowsPath(src_folder_directory).parts[:-1] #remove last part of path
        input_filename = Path(base_path, *base_directory, channel_map_recordings_file)

        lstNWBFields = ['date', 'runnum', 'array', 'set', 'mapping', 'ep', 'epFile', 'stim', 'stimType', 'stimCondition', 'stimLocation', 'stimFile', 'imaging', 'imagingFile', 'imagingType', 'imagingCond', 'imagingLocation', 'webcam', 'webcamFile', 'trialAvg', 'trialAvgType', 'trialAvgCond', 'animal', 'comment'] #headers I need

        if debug == True:
            print(f"READING ELECTRODE MEASUREMENT DATA FROM FILE: {input_filename}")
        electrode_data = pd.read_excel(input_filename, usecols=lambda x: x in lstNWBFields) #just extract columns/fields I need (ignore if not present - lambda part)
    else:
        # PROCEDURES FOR .cfg FILE
        input_filename = Path(src_folder_directory, channel_map_recordings_file)

        #NEED PROCEDURES TO PROCESS .cfg FILES
    return electrode_data


def process_electrode_measurements(input_filename: str, raw_src_folder_directory: str, stimulus_notes: str, debug: bool = False):
    '''Used for electrode measurements table processing

    Captures input/output location path & filename and calls conversion script Intan -> NWB'''
    #Note: must use 'PureWindowsPath' for org dataset (raw_src_folder_directory)
    output_folder = PureWindowsPath(raw_src_folder_directory).parts[:-1]

    output_filename = None
    filename = PureWindowsPath(input_filename) #wrong extension; replace with 'nwb'
    session_description = str(filename.with_suffix('')) #must be unique (str)

    input_filename = Path(base_path, *output_folder, 'ephys', input_filename)
    raw_filename = input_filename.stem #just filename without extension
    output_filename = str(raw_filename) + '.nwb' #add extension for output filename

    dest_path = str(Path(base_path, 'output', *output_folder, output_filename)) #path must be string for Itan converter
    os.makedirs(Path(base_path, 'output', *output_folder), exist_ok = True)

    subject = None #temp fix
    surgery = None #temp fix
    pharmacology = None #temp fix
    manual_start_time = '' #temp fix
    ##################################################################################
    if debug == True:
        print(f'\tELECTRODE MEASUREMENT SOURCE FILE: {input_filename}')
        print(f'\tELECTRODE MEASUREMENT DESTINATION FILE: {dest_path}')

    #Note: As of 21-NOV-2022, 'merge_files' does not work. Each file will need conversion and merging

    if os.path.isfile(dest_path) != True: #file conversion completed
        convert_to_nwb(intan_filename=str(input_filename),
                       nwb_filename=dest_path,
                       session_description=session_description,
                       blocks_per_chunk=1000,
                       use_compression=True,
                       compression_level=4,
                       lowpass_description='Unknown lowpass filtering process',
                       highpass_description='Unknown lowpass filtering process',
                       merge_files=False,
                       subject=subject,
                       surgery=surgery,
                       stimulus_notes=stimulus_notes,
                       pharmacology=pharmacology,
                       manual_start_time=None)


def get_subject(age, subject_description, genotype, sex, species, subject_id, subject_weight, date_of_birth, subject_strain):
    '''Used for meta-data '''
    if isinstance(age, str) != True:
        subject_age = "P" + str(age) #ISO 8601 Duration format

    dob = date_of_birth.to_pydatetime() #convert pandas timestamp to python datetime format
    if isinstance(dob.year, int) and isinstance(dob.month, int) and isinstance(dob.day, int) == True:
        date_of_birth = datetime(dob.year, dob.month, dob.day, tzinfo=tzlocal())
    else:
        date_of_birth = None

    subject = pynwb.file.Subject(age=subject_age,
                             description=subject_description,
                             genotype=str(genotype),
                             sex=sex,
                             species=species,
                             subject_id=subject_id,
                             weight=subject_weight,
                             date_of_birth=date_of_birth,
                             strain=subject_strain
                            )
    return subject


lstRecords = load_data().to_dict('records') #creates list of dictionaries

for cnt, dataset in enumerate(lstRecords):
    print(f"PROCESSING DATASET #{cnt+1}")
    print(f"\tsession_id: {dataset['session_id']}")

    age = dataset['age']
    subject_description = dataset['subject_description']
    genotype = dataset['genotype']
    sex = dataset['sex']
    species = dataset['species']
    subject_id = dataset['subject_id']
    subject_weight = dataset['subject_weight']
    date_of_birth = dataset['date_of_birth(YYYY-MM-DD)']
    subject_strain = dataset['subject_strain']

    #CONCATENATE STIMULUS NOTES
    stimulus_notes = 'NA'
    if dataset['stimulus_notes_include'] == 1: #1 (include) or 2 (do not include)
        stimulus_notes = "Stimulus paradigm: " + str(dataset['stimulus_notes_paradigm']) + "; "
        if dataset['stimulus_notes_direct_electrical_stimulation'] == 1:
            stimulus_notes += "Direct electrical stimulation paradigm: " + str(dataset['stimulus_notes_direct_electrical_stimulation_paradigm']) + "; "
    ##################################################################################
    subject = get_subject(age,
                          subject_description,
                          genotype,
                          sex,
                          species,
                          subject_id,
                          subject_weight,
                          date_of_birth,
                          subject_strain)
    ##################################################################################

    ##################################################################################
    output_filename = None
    session_id = dataset['session_id']
    filename = Path(session_id) #wrong extension; replace with 'nwb'
    output_filename = filename.with_suffix('.nwb')
    dest_path = str(PurePath('/mnt/e/temp/Devor-gdrive/output/', output_filename)) #path must be string for Itan converter
    print(f'\tDESTINATION FILE: {dest_path}')

    src_path_supplement = PureWindowsPath(dataset['src_folder_directory'])

    input_filename = str(PurePath('/mnt/e/temp/Devor-gdrive/', src_path_supplement, session_id))
    print(f'\tINPUT FILE: {input_filename}')
    ##################################################################################

    ##################################################################################
    # PROCESS META-DATA
    session_description = dataset['session_description']
    pharmacology = None #NEED DESTINATION FIELD FOR NWB FILE
    if dataset['pharmacology_notes_anesthetized_during_recording'] == 1:
        pharmacology = dataset['pharmacology']

    surgery = None #NEED DESTINATION FIELD FOR NWB FILE
    manual_start_time = None #NEED DESTINATION FIELD FOR NWB FILE

    if os.path.isfile(dest_path) != True: #file conversion completed
        convert_to_nwb(intan_filename=input_filename,
                   nwb_filename=dest_path,
                   session_description=session_description,
                   blocks_per_chunk=1000,
                   use_compression=True,
                   compression_level=4,
                   lowpass_description='Unknown lowpass filtering process',
                   highpass_description='Unknown lowpass filtering process',
                   merge_files=False,
                   subject=subject,
                   surgery=surgery,
                   stimulus_notes=stimulus_notes,
                   pharmacology=pharmacology,
                   manual_start_time=manual_start_time)
    ##################################################################################

    ##################################################################################
    # CREATE/CONVERT ELECTRODES TABLE(S)
    # capture from main input.xlsx (loop); LIST OF RECORDINGS IN .xls or .cfg FILE
    src_folder_directory = dataset['src_folder_directory']
    electrode_recordings = dataset['electrode_recordings']
    electrode_recordings_type = dataset['electrode_recordings_type'] #NEED DESTINATION FIELD FOR NWB FILE
    electrode_recordings_contact_material = dataset['electrode_recordings_contact_material'] #NEED DESTINATION FIELD FOR NWB FILE

    channel_map_recordings_file = electrode_recordings #Excel (or .cfg) containing locations of electrode measurement recordings
    lstRecords = get_measurements_data(src_folder_directory, channel_map_recordings_file, debug)
    if isinstance(lstRecords, pd.DataFrame):
        dictRecords = lstRecords.to_dict('records') #creates list of dictionaries

        ##################################################################################
        if debug == True:
            print(f'CREATING ELECTRODES TABLE FROM ENTRIES IN FILE: {channel_map_recordings_file}')

        for record in dictRecords:
            stimulus_notes = None

            input_filename = record['epFile']
            if record['stim'] == 1:
                stimulus_notes = record['stimType']
            process_electrode_measurements(input_filename, src_folder_directory, stimulus_notes)
        ##################################################################################

    else:
        print("ERROR READING CHANNEL MAP RECORDINGS FILE")


PROCESSING DATASET #1
	session_id: san4_run06_optogen_N30_400au_5ms_ISI5s_centerofarray_awake_191107_150118.rhd
	DESTINATION FILE: /mnt/e/temp/Devor-gdrive/output/san4_run06_optogen_N30_400au_5ms_ISI5s_centerofarray_awake_191107_150118.nwb
	INPUT FILE: /mnt/e/temp/Devor-gdrive/20191017_Chronic implant_SL2701_San4/20191107_San4_day21/ephys/san4_run06_optogen_N30_400au_5ms_ISI5s_centerofarray_awake_191107_150118.rhd
READING ELECTRODE MEASUREMENT DATA FROM FILE: /mnt/e/temp/Devor-gdrive/20191017_Chronic implant_SL2701_San4/20191107_San4_day21/20191107_San4_day21_EXP.xls
CREATING ELECTRODES TABLE FROM ENTRIES IN FILE: 20191107_San4_day21_EXP.xls
	ELECTRODE MEASUREMENT SOURCE FILE: /mnt/e/temp/Devor-gdrive/20191017_Chronic implant_SL2701_San4/20191107_San4_day21/ephys/san4_run03_optogen_N30_300au_5ms_ISI10s_centerofarray_isoflurane_191107_143024.rhd
	ELECTRODE MEASUREMENT DESTINATION FILE: /mnt/e/temp/Devor-gdrive/output/20191017_Chronic implant_SL2701_San4/20191107_San4_day21/san4_run03_op