From e9a5857800b8d09c1ab5e58bd8fb041fc48aac25 Mon Sep 17 00:00:00 2001 From: Abuzar Mahmood Date: Tue, 6 Feb 2024 10:45:30 -0500 Subject: [PATCH] Separate emg_data by dig-in to allow uneven taste deliveries --- blech_make_arrays.py | 72 +++++++++++---------- emg/emg_filter.py | 145 ++++++++++++++++++++++++++++++------------- 2 files changed, 136 insertions(+), 81 deletions(-) diff --git a/blech_make_arrays.py b/blech_make_arrays.py index 9264880..11c5095 100644 --- a/blech_make_arrays.py +++ b/blech_make_arrays.py @@ -21,15 +21,14 @@ def get_dig_in_data(hf5): return dig_in_pathname, dig_in_basename, dig_in_data def create_spike_trains_for_digin( - taste_starts_cutoff, - this_dig_in, + this_starts, durations, sampling_rate_ms, units, hf5, ): spike_train = [] - for this_start in this_dig_in: + for this_start in this_starts: spikes = np.zeros((len(units), durations[0] + durations[1])) for k in range(len(units)): # Get the spike times around the end of taste delivery @@ -61,27 +60,32 @@ def create_spike_trains_for_digin( hf5.flush() def create_emg_trials_for_digin( - taste_starts_cutoff, - this_dig_in, + this_starts, durations, sampling_rate_ms, emg_nodes, hf5, ): - emg_data = [] - for this_start in this_dig_in: - for this_emg in emg_nodes: - emg_data.append(this_emg[this_start - durations[0]*sampling_rate_ms:\ - this_start + durations[1]*sampling_rate_ms]) - emg_data = np.stack(emg_data, axis=0)*0.195 + emg_data = [[this_emg[this_start - durations[0]*sampling_rate_ms:\ + this_start + durations[1]*sampling_rate_ms] \ + for this_start in this_starts] + for this_emg in emg_nodes] + emg_data = np.stack(emg_data)*0.195 emg_data = np.mean( - emg_data.reshape((len(emg_data),-1, int(sampling_rate_ms))), + emg_data.reshape((*emg_data.shape[:2],-1, int(sampling_rate_ms))), axis = -1) + # Write out ind:name map for each node + ind_name_map = {i:node._v_name for i,node in enumerate(emg_nodes)} + str_dict = str(ind_name_map) + if '/emg_data/ind_electrode_map' in hf5: + hf5.remove_node('/emg_data','ind_electrode_map') + hf5.create_array('/emg_data', 'ind_electrode_map', np.array(str_dict)) + # And add emg_data to the hdf5 file hf5.create_group('/emg_data', dig_in_basename[i]) - spike_array = hf5.create_array( + hf5.create_array( f'/emg_data/{dig_in_basename[i]}', 'emg_array', np.array(emg_data)) hf5.flush() @@ -95,8 +99,6 @@ def create_emg_trials_for_digin( # Ask for the directory where the hdf5 file sits, and change to that directory # Get name of directory with the data files metadata_handler = imp_metadata(sys.argv) - dir_name = '/media/bigdata/NM43_2500ms_160515_104159_copy' - metadata_handler = imp_metadata([[],dir_name]) os.chdir(metadata_handler.dir_name) print(f'Processing: {metadata_handler.dir_name}') @@ -232,7 +234,7 @@ def create_emg_trials_for_digin( hf5.close() trial_info_frame.to_hdf(metadata_handler.hdf5_name, 'trial_info_frame', mode='a') hf5 = tables.open_file(metadata_handler.hdf5_name, 'r+') - csv_path = os.path.join(dir_name, 'trial_info_frame.csv') + csv_path = os.path.join(metadata_handler.dir_name, 'trial_info_frame.csv') trial_info_frame.to_csv(csv_path, index=False) # Get list of units under the sorted_units group. @@ -244,26 +246,24 @@ def create_emg_trials_for_digin( #============================================================# # NOTE: Calculate headstage falling off same way for all not "none" channels # Pull out raw_electrode and raw_emg data - if '/raw' in hf5: - raw_electrodes = [x for x in hf5.get_node('/','raw')] - else: - raw_electrodes = [] + + # If sorting hasn't been done, use only emg channels + # to calculate cutoff...don't need to go through all channels + if '/raw_emg' in hf5: raw_emg_electrodes = [x for x in hf5.get_node('/','raw_emg')] else: raw_emg_electrodes = [] - all_electrodes = [raw_electrodes, raw_emg_electrodes] - all_electrodes = [x for y in all_electrodes for x in y] - # If raw channel data is present, use that to calcualte cutoff - # This would explicitly be the case if only EMG was recorded - if len(all_electrodes) > 0: - all_electrode_names = [x._v_pathname for x in all_electrodes] - electrode_names = list(zip(*[x.split('/')[1:] for x in all_electrode_names])) + if len(raw_emg_electrodes) > 0: + emg_electrode_names = [x._v_pathname for x in raw_emg_electrodes] + electrode_names = list(zip(*[x.split('/')[1:] for x in emg_electrode_names])) - print('Calculating cutoff times') + print('Calculating cutoff times using following EMG electrodes...') + print(emg_electrode_names) + print('===============================================') cutoff_data = [] - for this_el in tqdm(all_electrodes): + for this_el in tqdm(raw_emg_electrodes): raw_el = this_el[:] # High bandpass filter the raw electrode recordings filt_el = get_filtered_electrode( @@ -302,8 +302,8 @@ def create_emg_trials_for_digin( 'recording_cutoff' ], ) - elec_cutoff_frame['electrode_type'] = all_electrode_names[0] - elec_cutoff_frame['electrode_name'] = all_electrode_names[1] + elec_cutoff_frame['electrode_type'] = electrode_names[0] + elec_cutoff_frame['electrode_name'] = electrode_names[1] # Write out to HDF5 hf5.close() @@ -357,11 +357,10 @@ def create_emg_trials_for_digin( hf5.create_group('/', 'spike_trains') # Pull out spike trains - for i, this_dig_in in zip(taste_digin_inds, taste_starts_cutoff): + for i, this_starts in zip(taste_digin_inds, taste_starts_cutoff): print(f'Creating spike-trains for {dig_in_basename[i]}') create_spike_trains_for_digin( - taste_starts_cutoff, - this_dig_in, + this_starts, durations, sampling_rate_ms, units, @@ -387,11 +386,10 @@ def create_emg_trials_for_digin( hf5.create_group('/', 'emg_data') # Pull out emg trials - for i, this_dig_in in zip(taste_digin_inds, taste_starts_cutoff): + for i, this_starts in zip(taste_digin_inds, taste_starts_cutoff): print(f'Creating emg-trials for {dig_in_basename[i]}') create_emg_trials_for_digin( - taste_starts_cutoff, - this_dig_in, + this_starts, durations, sampling_rate_ms, emg_nodes, diff --git a/emg/emg_filter.py b/emg/emg_filter.py index d93e16d..e9413cd 100644 --- a/emg/emg_filter.py +++ b/emg/emg_filter.py @@ -8,19 +8,33 @@ import shutil import glob import pandas as pd +import tables +import ast sys.path.append('..') from utils.blech_utils import imp_metadata # Get name of directory with the data files -metadata_handler = imp_metadata(sys.argv) +# metadata_handler = imp_metadata(sys.argv) +dir_name = '/media/bigdata/NM43_2500ms_160515_104159_copy' +metadata_handler = imp_metadata([[],dir_name]) dir_name = metadata_handler.dir_name os.chdir(dir_name) print(f'Processing : {dir_name}') +############################################################ # Load the data # shape : channels x tastes x trials x time -emg_data = np.load('emg_output/emg_data.npy') +# emg_data = np.load('emg_output/emg_data.npy') +with tables.open_file(metadata_handler.hdf5_name, 'r') as hf5: + emg_digins = hf5.list_nodes('/emg_data') + emg_digins = [x for x in emg_digins if 'dig_in' in x._v_name] + emg_digin_names = [x._v_name for x in emg_digins] + emg_data = [x.emg_array[:] for x in emg_digins] + map_array = hf5.get_node('/emg_data/ind_electrode_map').read() +ind_electrode_map = ast.literal_eval(str(map_array)[2:-1]) +# key = electrode_ind, value = index in array +inverse_map = {int(v.split('emg')[1]): k for k, v in ind_electrode_map.items()} info_dict = metadata_handler.info_dict params_dict = metadata_handler.params_dict @@ -49,16 +63,13 @@ wanted_rows = wanted_rows.sort_values('electrode_ind') wanted_rows.reset_index(inplace=True, drop=True) -print('Using electrodes :') -print(wanted_rows) -print() - # TODO: Ask about differencing pairs # Difference by CAR emg labels # If only 1 channel per emg CAR, do not difference if asked emg_car_groups = [x[1] for x in wanted_rows.groupby('CAR_group')] emg_car_names = [x.CAR_group.unique()[0] for x in emg_car_groups] -emg_car_inds = [x.index.values for x in emg_car_groups] +# emg_car_inds = [x.index.values for x in emg_car_groups] +emg_car_inds = [[inverse_map[x] for x in y.electrode_ind.values] for y in emg_car_groups] print('EMG CAR Groups with more than 1 channel will be differenced') print('EMG CAR groups as follows:') @@ -67,45 +78,76 @@ print() # TODO: This question can go into an EMG params file -# todo: Rename to diff_data at this stage # Bandpass filter the emg signals, and store them in a numpy array. # Low pass filter the bandpassed signals, and store them in another array # Take difference between pairs of channels -# Shape : Channels x Tastes x Trials x Time -emg_data_grouped = [emg_data[x] for x in emg_car_inds] -emg_diff_data = [] +# emg_data = List of arrays (per dig-in) of shape : channels x trials x time +# emg_data_grouped = list of lists +# outer list : emg CAR groups +# inner list : dig-ins +# element_array : channels x trials x time +emg_data_grouped = [[dat[x] for dat in emg_data] for x in emg_car_inds] +# Make sure all element arrays are 3D with shape: channels x trials x time for x in emg_data_grouped: - if len(x) > 1: - emg_diff_data.append(np.squeeze(np.diff(x,axis=0))) - elif len(x) > 2: - raise Exception("More than 2 per EMG CAR currently not supported") - else: - emg_diff_data.append(np.squeeze(x)) + for y in x: + if len(y.shape) < 3: + y = np.expand_dims(y, axis=0) + +emg_diff_data = [] +for this_car in emg_data_grouped: + this_car_diff = [] + for this_dig in this_car: + if len(this_dig) > 1: + this_car_diff.append(np.squeeze(np.diff(this_dig,axis=0))) + elif len(this_dig) > 2: + raise Exception("More than 2 per EMG CAR currently not supported") + else: + this_car_diff.append(np.squeeze(this_dig)) + emg_diff_data.append(this_car_diff) # Iterate over trials and apply frequency filter -iters = list(np.ndindex(emg_diff_data[0].shape[:-1])) emg_filt_list = [] emg_env_list = [] -for x in emg_diff_data: - emg_filt = np.zeros(x.shape) - emg_env = np.zeros(x.shape) - for this_iter in iters: - temp_filt = filtfilt(m, n, x[this_iter[0], this_iter[1]]) - emg_filt[this_iter[0], this_iter[1]] = temp_filt - emg_env[this_iter[0], this_iter[1]] = filtfilt(c, d, np.abs(temp_filt)) - emg_filt_list.append(emg_filt) - emg_env_list.append(emg_env) +for car_group in emg_diff_data: + this_car_filt = [] + this_car_env = [] + for dig_in in car_group: + emg_filt = np.zeros(dig_in.shape) + emg_env = np.zeros(dig_in.shape) + temp_filt = filtfilt(m, n, dig_in) + temp_env = filtfilt(c, d, np.abs(temp_filt)) + this_car_filt.append(temp_filt) + this_car_env.append(temp_env) + emg_filt_list.append(this_car_filt) + emg_env_list.append(this_car_env) + +# Iterate and check for signficant changes in activity +n_cars = len(emg_diff_data) +n_dig = len(emg_diff_data[0]) +trial_lens = [[len(x) for x in y] for y in emg_diff_data] + +ind_frame = pd.DataFrame( + dict( + car_group = [x for x in range(n_cars) for y in range(n_dig)], + dig_in = [y for x in range(n_cars) for y in range(n_dig)], + trial_len = [np.arange(trial_lens[x][y]) for x in range(n_cars) for y in range(n_dig)] + ) + ) +# Explode the trial_len column +ind_frame = ind_frame.explode('trial_len') sig_trials_list = [] -for i in range(len(emg_diff_data)): +for i, this_row in ind_frame.iterrows(): + this_ind = this_row.values + this_dat = emg_filt_list[this_ind[0]][this_ind[1]][this_ind[2]] ## Get mean and std of baseline emg activity, ## and use it to select trials that have significant post stimulus activity # sig_trials (assumed shape) : tastes x trials - pre_m = np.mean(np.abs(emg_filt_list[i][...,:pre_stim]), axis = (2)) - pre_s = np.std(np.abs(emg_filt_list[i][...,:pre_stim]), axis = (2)) + pre_m = np.mean(np.abs(this_dat[pre_stim])) + pre_s = np.std(np.abs(this_dat[pre_stim])) - post_m = np.mean(np.abs(emg_filt_list[i][...,pre_stim:]), axis = (2)) - post_max = np.max(np.abs(emg_filt_list[i][...,pre_stim:]), axis = (2)) + post_m = np.mean(np.abs(this_dat[pre_stim:])) + post_max = np.max(np.abs(this_dat[pre_stim:])) # If any of the channels passes the criteria, select that trial as significant # 1) mean post-stim activity > mean pre-stim activity @@ -119,19 +161,34 @@ sig_trials = mean_bool * std_bool sig_trials_list.append(sig_trials) +ind_frame['sig_trials'] = sig_trials_list + # NOTE: Currently DIFFERENT sig_trials for each channel # Save the highpass filtered signal, # the envelope and the indicator of significant trials as a np array # Iterate over channels and save them in different directories -for num,this_name in enumerate(emg_car_names): - #dir_path = f'emg_output/emg_channel{num}' - dir_path = f'emg_output/{this_name}' - if os.path.exists(dir_path): - shutil.rmtree(dir_path) - os.makedirs(dir_path) - # emg_filt (output shape): tastes x trials x time - np.save(os.path.join(dir_path, f'emg_filt.npy'), emg_filt_list[num]) - # env (output shape): tastes x trials x time - np.save(os.path.join(dir_path, f'emg_env.npy'), emg_env_list[num]) - # sig_trials (output shape): tastes x trials - np.save(os.path.join(dir_path, 'sig_trials.npy'), sig_trials_list[num]) +ind_frame.to_hdf(metadata_handler.hdf5_name, '/emd_data/emg_sig_trials') + +with tables.open_file(metadata_handler.hdf5_name, 'r+') as hf5: + for digin_ind, digin_name in enumerate(emg_digin_names): + digin_path = f'/emg_data/{digin_name}' + if f'{digin_path}/processed_emg' in hf5: + hf5.remove_node(f'{digin_path}/processed_emg') + hf5.create_group(f'{digin_path}', 'processed_emg') + for car_ind , this_car_name in enumerate(emg_car_names): + # emg_filt (output shape): tastes x trials x time + # np.save(os.path.join(dir_path, f'emg_filt.npy'), emg_filt_list[num]) + hf5.create_array( + f'{digin_path}/processed_emg', + f'{this_car_name}_emg_filt', + emg_filt_list[car_ind][digin_ind] + ) + # env (output shape): tastes x trials x time + # np.save(os.path.join(dir_path, f'emg_env.npy'), emg_env_list[num]) + hf5.create_array( + f'{digin_path}/processed_emg', + f'{this_car_name}_emg_env', + emg_env_list[car_ind][digin_ind] + ) + # sig_trials (output shape): tastes x trials + # np.save(os.path.join(dir_path, 'sig_trials.npy'), sig_trials_list[num])