In [7]:
import pandas as pd
import os
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
from glob import glob  
from pathlib import Path  
import copy

In [10]:
roi_dict = {
            'caudalanteriorcingulate': [], #dACC
            'ctx-lh-caudalanteriorcingulate': [], 
            'ctx-rh-caudalanteriorcingulate': [],
            'rostralanteriorcingulate': [], #rACC
            'ctx-lh-rostralanteriorcingulate': [],
            'ctx-rh-rostralanteriorcingulate': [],
            'insula':[], #insula
            'ctx-lh-insula': [],
            'ctx-rh-insula': [],
            'medialorbitofrontal': [], #mOFC
            'ctx-lh-medialorbitofrontal': [],
            'ctx-rh-medialorbitofrontal': [],
            'postcentral': [], #S1
            'ctx-lh-postcentral': [],
            'ctx-rh-postcentral': [],
            'superiorfrontal': [], #dlPFC
            'ctx-lh-superiorfrontal': [],
            'ctx-rh-superiorfrontal': [],
           }

def find_files(path, suffix):    
    files_found = [f for f in glob(f"{path}/**/*{suffix}", recursive=True)]  
    files_found_sorted = sorted(files_found, key=lambda x: x[x.index('NY'):])  
    return [Path(el) for el in files_found_sorted]

def get_likely_contacts(anat_file_path, roi_dict, min_probability):    
    roi_dict_tmp = {roi: [] for roi in roi_dict}  
    
    with anat_file_path.open() as f:  
        try: 
            prob_loc_cols = ['Probability_1', 'Location_1',
                             'Probability_2', 'Location_2',
                             'Probability_3', 'Location_3',
                             'Probability_4', 'Location_4',]  
            names = ['Contact', 'X', 'Y', 'Z'] + prob_loc_cols  
            df = pd.read_csv(f, names=names, sep=' ', usecols=range(12), header=None)  
        except pd.errors.ParserError:  
            prob_loc_cols = ['Probability_1', 'Location_1',
                             'Probability_2', 'Location_2',
                             'Probability_3', 'Location_3',]
            names = ['Contact', 'X', 'Y', 'Z'] + prob_loc_cols  
            df = pd.read_csv(f, names=names, sep=' ', usecols=range(10), header=None)  
    df = df[df.Contact != '%'] 

    for _, row in df.iterrows():    
        for prob, loc in zip(prob_loc_cols[::2], prob_loc_cols[1::2]):  
            if row[loc] in roi_dict and float(row[prob].replace('%', '')) > min_probability:    
                roi_dict_tmp[row[loc]].append((row['Contact'],row[prob]))    
    return roi_dict_tmp   

def save_to_excel(base_name, roi_dict_tmp, df, i, save_path):  
    df.loc[i,'file_name'] = base_name
    for key, values in roi_dict_tmp.items():  
        sorted_values = sorted(values, key=lambda x: float(x[1].strip('%')), reverse=True)  
        value_strs = [', '.join(map(str, t)) for t in sorted_values]  
        df.loc[i, key] = ', '.join(value_strs)  
        
    df.to_excel(save_path, index=False)  

    return df

In [None]:
# Set paths/filenames
parent_data_path = '../../../Data/'
save_path = './electrode_coverage.xlsx'

In [None]:
# Find all anatomical region txt files in Data folder
anat_file_paths = find_files(parent_data_path, 'AnatomicalRegions.txt')
# anat_file_paths

In [None]:
# Create a DataFrame within which to save the contact names and ROI in the loop
df_cols = {**{'file_name':[]}, **roi_dict}
df = pd.DataFrame(df_cols)  

In [None]:
# Loop through each file and extract likely electrodes and their ROI
for i, file_path in enumerate(anat_file_paths):
    # Extract base_name from the file_path  
    base_name = os.path.basename(file_path).split('_STG_MTG_AnatomicalRegions.txt')[0]  
    print(f"Processing:\t{base_name}")  
  
    # Call the get_likely_contacts function to extract ROI contacts  
    roi_dict_tmp = get_likely_contacts(file_path, roi_dict, min_probability=70)  
  
    # Save the extracted data to an excel file  
    save_to_excel(base_name, roi_dict_tmp, df, i, save_path)  