In [648]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path: sys.path.append(module_path)

import pandas as pd
import json
from tqdm import tqdm
import pickle
from pathlib import Path

from RMALoaders import *

### Helper Functions

In [649]:
def mkdir(path):
    if not os.path.exists(path): os.makedirs(path)
    return path

In [650]:
desktop_path = Path.home() / 'Desktop'
connectivity_path = desktop_path / 'data' / 'connectivity'
print(connectivity_path)

/home/ikharitonov/Desktop/data/connectivity


### Main Parameters

In [651]:
main_structure = 'VISam'
main_structure_object = RMAStructure(acronym=main_structure)
main_structure_id = main_structure_object.id

HEMISPHERE_TO_FILTER = 1 # only experiments injected in this hemisphere will be selected

projection_metric = 'projection_energy' # for later export into brainrender

INJECTION_VOLUME_THRESHOLD = 0.01

PROJECTION_VOLUME_THRESHOLD = 0.1

READ_UNIONIZED_DATA = False # load the unionized data from experiments specified in dictionary above
READ_EXPERIMENT_LIST = False # read area-experiment_id dictionary from existing file

### Loading csv file with metadata of experiments projecting to the structure of interest

In [652]:
filename = main_structure + ".csv"
df = pd.read_csv(connectivity_path / 'connectivity_target_experiment_lists' / filename)
df

Unnamed: 0,id,transgenic-line,product-id,structure-id,structure-abbrev,structure-name,name,injection-volume,injection-structures,gender,strain,sum,structure-color,num-voxels,injection-coordinates,selected,experiment_page_url
0,512314723,Emx1-IRES-Cre,35,533,VISpm,posteromedial visual area,Emx1-IRES-Cre-234273,0.275993,"[{""id""=>385, ""abbreviation""=>""VISp"", ""name""=>""...",M,,4.011345e-01,08858c,,"[8480, 510, 4080]",False,http://connectivity.brain-map.org/projection/e...
1,100141599,,5,394,VISam,Anteromedial visual area,378-757,0.125483,"[{""id""=>394, ""abbreviation""=>""VISam"", ""name""=>...",M,C57BL/6J,2.757702e-01,08858c,,"[7900, 580, 7380]",False,http://connectivity.brain-map.org/projection/e...
2,126861679,,5,394,VISam,Anteromedial visual area,378-1348,0.169237,"[{""id""=>394, ""abbreviation""=>""VISam"", ""name""=>...",M,C57BL/6J,2.730652e-01,08858c,,"[7350, 1200, 7320]",False,http://connectivity.brain-map.org/projection/e...
3,115958825,,5,1027,AUDpo,Posterior auditory area,378-1185,0.210045,"[{""id""=>402, ""abbreviation""=>""VISal"", ""name""=>...",M,C57BL/6J,2.720842e-01,019399,,"[8150, 2510, 9640]",False,http://connectivity.brain-map.org/projection/e...
4,100148503,,5,394,VISam,Anteromedial visual area,378-897,0.088840,"[{""id""=>394, ""abbreviation""=>""VISam"", ""name""=>...",M,C57BL/6J,2.172984e-01,08858c,,"[7420, 1180, 7290]",False,http://connectivity.brain-map.org/projection/e...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2724,113935285,,5,463,CA3,Field CA3,378-1153,0.347883,"[{""id""=>463, ""abbreviation""=>""CA3"", ""name""=>""F...",M,C57BL/6J,2.206655e-10,7ed04b,,"[8500, 4310, 8910]",False,http://connectivity.brain-map.org/projection/e...
2725,301327022,Nos1-CreERT2,5,726,DG,Dentate gyrus,Nos1-CreERT2-126106,0.051096,"[{""id""=>382, ""abbreviation""=>""CA1"", ""name""=>""F...",M,B6.129,1.946178e-10,7ed04b,,"[7370, 2410, 6440]",False,http://connectivity.brain-map.org/projection/e...
2726,575683857,Ai75(RCL-nT),44,972,PL,Prelimbic area,Ai75(T503)-296477,0.018484,"[{""id""=>39, ""abbreviation""=>""ACAd"", ""name""=>""A...",F,unknown,1.709580e-10,2fa850,,"[3450, 3470, 5410]",False,http://connectivity.brain-map.org/projection/e...
2727,112936582,,5,345,SSp-m,"Primary somatosensory area, mouth",378-1079,0.178009,"[{""id""=>345, ""abbreviation""=>""SSp-m"", ""name""=>...",M,C57BL/6J,1.499148e-10,188064,,"[4470, 2700, 9140]",False,http://connectivity.brain-map.org/projection/e...


In [653]:
# https://allensdk.readthedocs.io/en/latest/_modules/allensdk/api/queries/mouse_connectivity_api.html#MouseConnectivityApi.get_structure_unionizes
# using this approach, unionized data would have to be downloaded for every experiment (might take a lot of time e.g. 1855 experiments for RSPagl)
RMAUnionizedData(experiment_id=512315551, select_structure_id=894).data

Unnamed: 0,hemisphere_id,id,is_injection,max_voxel_density,max_voxel_x,max_voxel_y,max_voxel_z,normalized_projection_volume,projection_density,projection_energy,projection_intensity,projection_volume,section_data_set_id,structure_id,sum_pixel_intensity,sum_pixels,sum_projection_pixel_intensity,sum_projection_pixels,volume,structure
0,2,640689589,False,0.696321,6920,350,6900,0.01324,0.016449,9.820845,597.049358,0.021373,512315551,894,121644300000.0,1060688000.0,10416850000.0,17447220.0,1.299343,"{'acronym': 'RSPagl', 'atlas_id': 394, 'color_..."
1,1,640690930,False,1.0,9720,1630,3410,0.068989,0.099411,206.254527,2074.762553,0.111363,512315551,894,332724100000.0,914473200.0,188614200000.0,90908830.0,1.12023,"{'acronym': 'RSPagl', 'atlas_id': 394, 'color_..."
2,3,640692612,False,1.0,9720,1630,3410,0.082229,0.054859,100.767021,1836.824888,0.132736,512315551,894,454368300000.0,1975161000.0,199031100000.0,108356100.0,2.419572,"{'acronym': 'RSPagl', 'atlas_id': 394, 'color_..."


### Get the reference list of brain areas

In [654]:
struct_set_id = 167587189 # Curated list of non-overlapping substructures at a mid-ontology level

structure_sets = RMAStructureSet()
struct_set = structure_sets.get_structure_set(id=struct_set_id)
struct_set

Unnamed: 0,acronym,atlas_id,color_hex_triplet,depth,failed,failed_facet,graph_id,graph_order,hemisphere_id,id,...,neuro_name_structure_id,neuro_name_structure_id_path,ontology_id,parent_structure_id,safe_name,sphinx_id,st_level,structure_id_path,structure_name_facet,weight
0,PAG,240.0,FF90FF,5,False,734881840,1,838,3,795,...,,,1,323,Periaqueductal gray,839,8,/997/8/343/313/323/795/,3260726339,8690
1,ARH,27.0,FF5D50,6,False,734881840,1,733,3,223,...,,,1,157,Arcuate hypothalamic nucleus,734,8,/997/8/343/1129/1097/157/223/,218062747,8690
2,ORBm,232.0,248A5E,7,False,734881840,1,264,3,731,...,,,1,714,Orbital area medial part,265,9,/997/8/567/688/695/315/714/731/,3012751712,8690
3,LSv,174.0,90CBED,7,False,734881840,1,589,3,266,...,,,1,242,Lateral septal nucleus ventral part,590,9,/997/8/567/623/477/275/242/266/,1660459064,8690
4,PD,255.0,FF5547,6,False,734881840,1,746,3,914,...,,,1,141,Posterodorsal preoptic nucleus,747,8,/997/8/343/1129/1097/141/914/,2759126254,8690
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
311,SSp-bfd,748.0,188064,8,False,734881840,1,51,3,329,...,,,1,322,Primary somatosensory area barrel field,52,9,/997/8/567/688/695/315/453/322/329/,3406319794,8690
312,OT,235.0,80CDF8,6,False,734881840,1,577,3,754,...,,,1,493,Olfactory tubercle,578,8,/997/8/567/623/477/493/754/,1598442672,8690
313,SubG,464.0,FF909F,7,False,734881840,1,710,3,321,...,,,1,1014,Subgeniculate nucleus,711,8,/997/8/343/1129/549/856/1014/321/,3545734096,8690
314,SNr,330.0,FF90FF,5,False,734881840,1,822,3,381,...,,,1,323,Substantia nigra reticular part,823,8,/997/8/343/313/323/381/,1375238552,8690


### Check that all projecting experiments are linked to an area in the list

In [655]:
# SANITY CHECK

# make a copy of experiment df
unmatched_df = df.copy()
num_collected_exps = 0

# loop through brain areas, removing experiments successfully matching with an area
for area_id in struct_set['id']:
    indexes_to_drop = df.index[df['structure-id']==area_id]
    num_collected_exps += len(indexes_to_drop)
    unmatched_df = unmatched_df.drop(indexes_to_drop)
    # if area_id == 329: break

# display df with unmatched experiments
print(num_collected_exps)
unmatched_df

2729


Unnamed: 0,id,transgenic-line,product-id,structure-id,structure-abbrev,structure-name,name,injection-volume,injection-structures,gender,strain,sum,structure-color,num-voxels,injection-coordinates,selected,experiment_page_url


dataframe is empty

167587189 -> sanity checked -> all experiments, from corresponding metadata downloaded in csv in connectivity_target_experiment_lists, are assigned to an area -> no missed experiments due to injection area (labelling convention) mismatch

### Collect experiments in {area_id: [exp_id_1, exp_id_2, ...]} dictionary

In [656]:
# Number of experiments from injections across all brain areas targeting RSPagl
total_num_collected_exps = 0
experiment_list = {}

for area_id in struct_set['id']:
    total_num_collected_exps += len(df[df['structure-id']==area_id])
    experiment_list[area_id] = list(df[df['structure-id']==area_id]['id'])
    # print("Experiments in area",struct_set[struct_set['id']==area_id]['acronym'].item(),"=", len(df[df['structure-id']==area_id]))
print(total_num_collected_exps,"experiments collected in the dictionary")
print(len(experiment_list),"number of areas collected in the dictionary")
print("area",795)
print(experiment_list[795])
print("area",223)
print(experiment_list[223])

2729 experiments collected in the dictionary
316 number of areas collected in the dictionary
area 795
[300076066, 267029447, 496114558, 266500714, 287247978, 272829745, 272699357, 266099165, 302053755, 300166697, 182144176, 113096571, 120761491, 301540850, 128002057, 158376179, 182280207, 304949216, 156979283, 300111793, 162020630, 543880631, 262188772, 301671287, 299856390, 147635309, 180524412, 298079928, 160538548, 120571672, 287712779]
area 223
[263369222, 286726777, 175738378, 241278553, 158142090, 232310521, 181891892, 586447435, 146554676, 171482142, 146660999, 176431817, 178282527, 286318327, 232311236, 298105299, 159751184]


### Filter out experiments where target structure and injection structures overlap

In [657]:
# Filtering out experiments with overlapping injection and target structures

experiment_list_inj_structs_removed = {}
exps_removed = []

for area_id, exps in experiment_list.items():
    # if a V2m structure id is contained in injection-structures of an experiment, drop that experiment (id) from experiment_list
    exps_ids_to_retain = []
    for exp_id in exps:
        # Loading and formatting dictionary with experiment's injection structures from Allen metadata
        inj_structs_dict = json.loads(df[df['id']==exp_id]['injection-structures'].item().replace("=>",":"))
        # Getting ids of experiment's injection structures
        inj_structs_id_list = [x['id'] for x in inj_structs_dict]
        if main_structure_id in inj_structs_id_list: exps_removed.append(exp_id)
        else: exps_ids_to_retain.append(exp_id)
    experiment_list_inj_structs_removed[area_id] = exps_ids_to_retain
print(len(exps_removed),"experiments removed:")
print(exps_removed)

97 experiments removed:
[298003295, 646525156, 562674923, 575782182, 112229103, 557187751, 576341623, 593018150, 576036240, 646525997, 553746532, 617901499, 557827228, 604100536, 572595932, 577298618, 643635656, 585021827, 595259180, 554333581, 657042668, 656632388, 597256577, 511817919, 646527844, 584511119, 100141599, 126861679, 100148503, 159753308, 184167484, 297233422, 571100135, 156671933, 294434867, 297670312, 267657327, 518742338, 287601808, 605661910, 288264047, 528510546, 552431726, 590548119, 517326050, 566244185, 524667618, 309515141, 560045081, 561986735, 518619451, 591168591, 516491813, 268038969, 559878074, 523714940, 478678606, 293821389, 613898292, 528963283, 651703553, 606930364, 666936463, 560724955, 294533406, 157769139, 653191449, 278175580, 597007143, 267761438, 168165712, 586041882, 552279683, 557347149, 520012330, 591394174, 535696750, 175018829, 267493760, 572588941, 590987294, 502074651, 182896517, 561506791, 297946154, 287225041, 570460301, 557342452, 6368039

### Save / load unionized data in csv files for every experiment

In [658]:
def load_unionized_data(experiment_dict):
    # each csv file with unionized data is read into memory
    area_experiment_unionized_data = {}
    foldername = f'{main_structure}_unionized_data'
    for area, experiments in tqdm(experiment_dict.items(),'Loading'):
        area_experiment_unionized_data[area] = {}
        for e in experiments:
            filename = f'area_{area}_experiment_{e}.csv'
            temp_df = pd.read_csv(connectivity_path / foldername / filename)
            area_experiment_unionized_data[area][e] = temp_df
    return area_experiment_unionized_data

def download_unionized_data(experiment_dict):
    # for each area, each experiment, download unionized data
    foldername = f'{main_structure}_unionized_data'
    path = mkdir(connectivity_path / foldername)
    for area, experiments in tqdm(experiment_dict.items(),'Downloading'):
        for e in experiments:
            filename = f'area_{area}_experiment_{e}.csv'
            # Skip if it already was downloaded
            if os.path.isfile(connectivity_path / foldername / filename):
                continue
            else:
                temp_data = RMAUnionizedData(experiment_id=e).data
                temp_data.to_csv(connectivity_path / foldername / filename)

In [None]:
if READ_UNIONIZED_DATA: area_experiment_unionized_data = load_unionized_data(experiment_list_inj_structs_removed)
else: download_unionized_data(experiment_list_inj_structs_removed)

Downloading:  19%|█▉        | 60/316 [1:15:51<2:28:42, 34.86s/it]  

### Checking the hemisphere of injection for each experiment

#### average template dimensions (from https://www.sciencedirect.com/science/article/pii/S0092867420304025)

<img src="https://global.discourse-cdn.com/standard10/uploads/brainobservatory/original/1X/44f3499fd49d9396d9d12597725afd41693582f5.png" alt="coordinate_system" width=900 />

13.2 mm x 8.0 mm x 11.4 mm

13200 µm x 8000 µm x 11400 µm

In [None]:
def get_hemisphere_from_z_coordinate(unionized_data):
    """
    Returns the hemisphere_id of the row in passed unionized data with the biggest volume.
    
        Parameters:
            unionized_data (pandas.DataFrame): unionized data with a single structure selected
            
        Returns:
            hemisphere_id (int): 1 for left hemisphere and 2 for right hemisphere. If there is no injection structure with specified structure_id, 0 is returned.
    """
    z_coord = unionized_data['max_voxel_z'].unique()
    # if there is data in both hemispheres, choose the one with higher volume
    if len(z_coord) > 1:
        z_coord = [unionized_data.iloc[unionized_data['volume'].idxmax()]['max_voxel_z']]

    if len(z_coord)==0: return 0
    elif z_coord[0] < 5700: return 1
    elif z_coord[0] >= 5700: return 2

def check_hemisphere(experiment_id, structure_id):
    """
    Returns the hemisphere_id for a given injection structure and experiment id. If the injection spans both hemispheres, the one with higher volume is chosen.
    
        Parameters:
            experiment_id (int): id of the experiment (section_data_set_id).
            structure_id (int): id of the injection structure.
            
        Returns:
            hemisphere_id (int): 1 for left hemisphere and 2 for right hemisphere. If there is no injection structure with specified structure_id, 0 is returned.
    """
    temp_data = RMAUnionizedData(experiment_id=experiment_id, is_injection=True, select_structure_id=structure_id).data.reset_index(drop=True)
    # temp_data = temp_data[temp_data['structure_id']==structure_id].reset_index(drop=True)

    return get_hemisphere_from_z_coordinate(temp_data)

### Removing experiments which have not been injected into specified hemisphere

In [None]:
# Removing all experiments that were not injected in the specified hemisphere

if READ_EXPERIMENT_LIST:
    # Reading from a file
    filename = f'{main_structure}_experiment_list_filtered_by_hemisphere_{HEMISPHERE_TO_FILTER}.pkl'
    with open(connectivity_path / filename, 'rb') as f: experiment_list_filtered_by_hemisphere = pickle.load(f)
else:
    # Copy the dictionary
    experiment_list_filtered_by_hemisphere = {k:v.copy() for k,v in experiment_list_inj_structs_removed.items()}
    exps_removed = []
    for area, exps in tqdm(experiment_list_filtered_by_hemisphere.items()):
        for e in exps:
            ind = exps.index(e)
            hem = check_hemisphere(e, area)
            if hem != HEMISPHERE_TO_FILTER: exps_removed.append(experiment_list_filtered_by_hemisphere[area].pop(ind))
    print(len(exps_removed),"experiments removed:")
    print(exps_removed)
    # Saving to a file
    filename = f'{main_structure}_experiment_list_filtered_by_hemisphere_{HEMISPHERE_TO_FILTER}.pkl'
    with open(connectivity_path / filename, 'wb') as f: pickle.dump(experiment_list_filtered_by_hemisphere, f)

In [None]:
# DELETE
print(sum(len(experiment_list_inj_structs_removed[area]) for area in experiment_list_inj_structs_removed.keys()))
print(sum(len(experiment_list_filtered_by_hemisphere[area]) for area in experiment_list_filtered_by_hemisphere.keys()))

In [None]:
df[df['id']==300076066]

In [None]:
print(experiment_list_filtered_by_hemisphere.keys())

In [None]:
print(len(experiment_list_filtered_by_hemisphere))

### Quality check for experiments with zero-valued projection metric in unionzed data

In [None]:
if not load_unionized_data: area_experiment_unionized_data = load_unionized_data(experiment_list_filtered_by_hemisphere)

In [None]:
print(f'number of experiments BEFORE QC = {sum(len(area_experiment_unionized_data[area]) for area in area_experiment_unionized_data.keys())}')

temp_dict = {}
for area in area_experiment_unionized_data:
    temp_dict[area] = {}
    for exp, exp_df in area_experiment_unionized_data[area].items():
        if (exp_df[exp_df['structure_id']==main_structure_id][projection_metric] == 0).any():
            continue
        else:
            temp_dict[area][exp] = exp_df

del area_experiment_unionized_data
area_experiment_unionized_data = temp_dict

print(f'number of experiments AFTER QC = {sum(len(area_experiment_unionized_data[area]) for area in area_experiment_unionized_data.keys())}')

In [None]:
# DELETE
# len(area_experiment_unionized_data.keys())
# t = area_experiment_unionized_data[223][171482142]
# t[t['structure_id']==533]



In [None]:
# DELETE
# area_experiment_unionized_data[223].keys()

### Apply <b>injection</b> volume thresholding to experiments

In [None]:
def get_vol_from_downloaded_unionized_data(area, experiment):
    # DELETE the file reading part
    # For provided area and experiment id, reads corresponding csv file and returns volume
    # filename = 'area_id_'+str(area)+'exp_id_'+str(experiment)+'.csv'
    # if os.path.isfile(path / filename):
    #     temp_data = pd.read_csv(path / filename)
    # else: return 0
    # temp_data = temp_data[(temp_data['is_injection']==True) & (temp_data['structure_id']==area) & (temp_data['hemisphere_id']==3)].reset_index(drop=True)
    
    # Query data for injection structure and return volume of injection hemisphere
    temp_data = RMAUnionizedData(experiment_id=experiment,is_injection=True,select_structure_id=area).data.reset_index(drop=True)
    temp_data = temp_data[temp_data['hemisphere_id']==get_hemisphere_from_z_coordinate(temp_data)]
    return temp_data['volume'].item()

In [None]:
print(f'number of experiments BEFORE injection volume thresholding = {sum(len(area_experiment_unionized_data[area]) for area in area_experiment_unionized_data.keys())}')

# DELETE foldername = f'unionized_data_from_hem_{HEMISPHERE_TO_FILTER}_{main_structure}_projecting_experiments'
# folderpath = connectivity_path / foldername

temp_dict = {}

for area in tqdm(area_experiment_unionized_data):
    temp_dict[area] = {}
    for exp, exp_df in area_experiment_unionized_data[area].items():
        temp_vol = get_vol_from_downloaded_unionized_data(area, exp)
        if temp_vol >= INJECTION_VOLUME_THRESHOLD:
            temp_dict[area][exp] = area_experiment_unionized_data[area][exp]
del area_experiment_unionized_data
area_experiment_unionized_data = temp_dict

print(f'number of experiments AFTER injection volume thresholding = {sum(len(area_experiment_unionized_data[area]) for area in area_experiment_unionized_data.keys())}')

In [None]:
# DELETE
len(area_experiment_unionized_data.keys())

### Separate experiments by the difference in ipsilateral and contralateral projections to target area

In [None]:
# Display projection information about experiments and hemisphere of target structure
# And collect experiments into two dictionaries

ipsilateral_projecting_exps = []
contralateral_projecting_exps = []
ipsilateral_dict = {}
contralateral_dict = {}

hem_ids = [2,1] # for getting the index of contralateral hemisphere to the one specified before

for area in area_experiment_unionized_data:
    ipsilateral_dict[area] = {}
    contralateral_dict[area] = {}
    for exp, exp_df in area_experiment_unionized_data[area].items():
        # Checking if unionized data of experiment has higher projection metric value in previosly selected hemisphere
        if exp_df[(exp_df['hemisphere_id']==HEMISPHERE_TO_FILTER) & (exp_df['structure_id']==main_structure_id)][projection_metric].item() > exp_df[(exp_df['hemisphere_id']==hem_ids[HEMISPHERE_TO_FILTER-1]) & (exp_df['structure_id']==main_structure_id)][projection_metric].item():
            ipsilateral_projecting_exps.append(exp)
            # Taking coordinates data from the Source structure and joining it with projection metric data from Target structure (for the convenience of access later) in one dataframe
            temp_df1 = exp_df[exp_df['structure_id']==area][['hemisphere_id','max_voxel_x','max_voxel_y','max_voxel_z']]
            temp_df2 = exp_df[exp_df['structure_id']==main_structure_id][['hemisphere_id',projection_metric]]
            ipsilateral_dict[area][exp] = temp_df1.merge(temp_df2, on='hemisphere_id')
        else: 
            contralateral_projecting_exps.append(exp)
            temp_df1 = exp_df[exp_df['structure_id']==area][['hemisphere_id','max_voxel_x','max_voxel_y','max_voxel_z']]
            temp_df2 = exp_df[exp_df['structure_id']==main_structure_id][['hemisphere_id',projection_metric]]
            contralateral_dict[area][exp] = temp_df1.merge(temp_df2, on='hemisphere_id')

print(len(ipsilateral_projecting_exps),'ipsilaterally projecting experiments')
print(len(contralateral_projecting_exps),'contralaterally projecting experiments')

### Apply <b>projection</b> volume thresholding to experiments

In [None]:
metric_for_thresholding = 'normalized_projection_volume'

In [None]:
# In ipsilateral experiments, thresholding is done on target structure in the same hemisphere as HEMISPHERE_TO_FILTER. In contralateral, the opposite.

print(f'number of ipsilateral experiments BEFORE projection volume thresholding = {sum(len(ipsilateral_dict[area]) for area in ipsilateral_dict.keys())}')
print(f'number of contralateral experiments BEFORE projection volume thresholding = {sum(len(contralateral_dict[area]) for area in contralateral_dict.keys())}')

temp_ipsilateral_dict = {}
temp_contralateral_dict = {}

for area in tqdm(ipsilateral_dict.keys, 'ipsilateral'):
    temp_ipsilateral_dict[area] = {}
    for exp, exp_df in ipsilateral_dict[area].items():
        proj_vol = exp_df[(exp_df['hemisphere_id']==HEMISPHERE_TO_FILTER) & (exp_df['structure_id']==main_structure_id)][metric_for_thresholding].item()
        if proj_vol >= PROJECTION_VOLUME_THRESHOLD:
            temp_ipsilateral_dict[area][exp] = exp_df

for area in tqdm(contralateral_dict.keys, 'contralateral'):
    temp_contralateral_dict[area] = {}
    for exp, exp_df in contralateral_dict[area].items():
        proj_vol = exp_df[(exp_df['hemisphere_id']==hem_ids[HEMISPHERE_TO_FILTER-1]) & (exp_df['structure_id']==main_structure_id)][metric_for_thresholding].item()
        if proj_vol >= PROJECTION_VOLUME_THRESHOLD:
            temp_contralateral_dict[area][exp] = exp_df

del ipsilateral_dict
del contralateral_dict
ipsilateral_dict = temp_ipsilateral_dict
contralateral_dict = temp_contralateral_dict
        
print(f'number of ipsilateral experiments AFTER projection volume thresholding = {sum(len(ipsilateral_dict[area]) for area in ipsilateral_dict.keys())}')
print(f'number of contralateral experiments AFTER projection volume thresholding = {sum(len(contralateral_dict[area]) for area in contralateral_dict.keys())}')

### Display mouse lines of ipsilateral and contralateral experiments

In [None]:
print("Ipsilateral mouse lines")
ipsilateral_lines = [df[df['id']==x]['transgenic-line'].item() for x in ipsilateral_projecting_exps]
print(ipsilateral_lines)

print("Contralateral mouse lines")
contralateral_lines = [df[df['id']==x]['transgenic-line'].item() for x in contralateral_projecting_exps]
print(contralateral_lines)

### For every brain area compute average projection metric and use it to compute weighted centroid

In [None]:
def xyz_weighted_centroid(coordinates):
    """
    Returns xyz coordinates of a centroid weighted by vertices and the associated average projection metric. Computed to determine central coordinate within a brain region, weighted by projection metric of each experiment injected in that region.
    
    cx = (v1x*m1 + v2x*m2 + ... vnx*mn) / (m1 + m2 .... mn) 
    cy = (v1y*m1 + v2y*m2 + ... vny*mn) / (m1 + m2 .... mn)
    cz = (v1z*m1 + v2z*m2 + ... vnz*mn) / (m1 + m2 .... mn)
    
    where v1x, v1y and v1z are xyz coordinates of vertex 1 and m1 is its weight.
    
        Parameters:
            coordinates (List): list of nested lists containing coordinates and weight of each vertex nested [[x1, y1, z3, w1], [x2, y2, z2, w2], ...].
        
        Returns:
            centroid_point (List): location of centroid and related average value of projection metric in the form of [x, y, z, avg_projection].
    """
    denom = sum(exp[3] for exp in coordinates)
    centroid_point = [int(sum(exp[0]*exp[3] for exp in coordinates) / denom), int(sum(exp[1]*exp[3] for exp in coordinates) / denom), int(sum(exp[2]*exp[3] for exp in coordinates) / denom), denom / len(coordinates)]
    
    return centroid_point

In [None]:
# DELETE
# area_experiment_unionized_data[223]

In [None]:
# DELETE
# l = [223, 266, 914, 207, 1049, 609, 35, 1009, 38, 1105, 59, 390, 591, 262, 872, 1061, 287, 30, 564, 604, 238, 619, 689, 88, 880, 614, 422, 255, 576073699, 549009223, 333, 27, 131, 483, 583, 23, 706, 677, 1037, 133, 484682470, 966, 1025, 151, 763, 280, 369, 830, 765, 231, 356, 839, 549009215, 206, 612, 169, 898, 757, 66, 968, 484682508, 182305689, 621, 210, 177, 374, 589508451, 639, 263, 982, 178, 534, 136, 75, 607344830, 106, 975, 222, 970, 1027, 347, 1031, 1077, 773, 15, 12, 574, 549009219, 10671, 581, 127, 1113, 589508447, 7, 100, 470, 181, 203, 149, 162, 350, 358, 1041, 998, 96, 72, 235, 576073704, 398, 560581559, 1018, 246, 599626927, 147, 118, 549009227, 576, 1044, 115, 629, 318, 298, 566, 58, 312782574, 271, 922, 1039, 491, 272, 63, 741, 135, 1109, 126, 515, 907, 173, 718, 725, 295, 146, 230, 414, 616, 606826663, 580, 1029, 563807435, 460, 445, 310, 560581563, 693, 642, 423, 749, 951, 292, 226, 653, 531, 599, 475, 225, 1052, 1098, 364, 197, 563807439, 19, 286, 957, 859, 814, 523, 1069, 325, 372, 250, 628, 1020, 186, 944, 366, 980, 679, 589, 332, 874, 846, 936, 788, 781, 952, 589508455, 780, 307, 549009211, 452, 903, 1, 1004, 946, 1107, 189, 711, 1120, 214, 1057, 525, 912, 338, 217, 533, 989, 575, 634, 599626923, 930, 1126, 83, 64, 209, 321, 381]
# for a in l: print(a,contralateral_dict[a])
# 1041 the only in ipsilateral
# contralateral has more e.g. 262

In [None]:
hem_ids = [2,1] # for getting the index of contralateral hemisphere to the one specified before
ipsilateral_centroids_dict = {}
contralateral_centroids_dict = {}

for area in ipsilateral_dict:
    coordinates = [[exp[exp['hemisphere_id']==HEMISPHERE_TO_FILTER]['max_voxel_x'].item(), exp[exp['hemisphere_id']==HEMISPHERE_TO_FILTER]['max_voxel_y'].item(), exp[exp['hemisphere_id']==HEMISPHERE_TO_FILTER]['max_voxel_z'].item(), exp[exp['hemisphere_id']==HEMISPHERE_TO_FILTER][projection_metric].item()] for exp in ipsilateral_dict[area].values()]
    if len(coordinates) == 0: centroid_xyz = None
    else: centroid_xyz = xyz_weighted_centroid(coordinates)
    if centroid_xyz: ipsilateral_centroids_dict[area] = centroid_xyz
print(len(ipsilateral_centroids_dict.keys()),'ipsilateral centroids computed out of',str(len(ipsilateral_dict.keys())),'regions')

for area in contralateral_dict:
    coordinates = [[exp[exp['hemisphere_id']==hem_ids[HEMISPHERE_TO_FILTER-1]]['max_voxel_x'].item(), exp[exp['hemisphere_id']==hem_ids[HEMISPHERE_TO_FILTER-1]]['max_voxel_y'].item(), exp[exp['hemisphere_id']==hem_ids[HEMISPHERE_TO_FILTER-1]]['max_voxel_z'].item(), exp[exp['hemisphere_id']==hem_ids[HEMISPHERE_TO_FILTER-1]][projection_metric].item()] for exp in contralateral_dict[area].values()]
    if len(coordinates) == 0: centroid_xyz = None
    else: centroid_xyz = xyz_weighted_centroid(coordinates)
    if centroid_xyz: contralateral_centroids_dict[area] = centroid_xyz
print(len(contralateral_centroids_dict.keys()),'contralateral centroids computed out of',str(len(contralateral_dict.keys())),'regions')

### Saving computed centroids

In [None]:
# DELETE
len(ipsilateral_centroids_dict.keys())

In [None]:
ipsilateral_centroids_dict

In [None]:
foldername = f'centroids_{projection_metric}_hem_id_{HEMISPHERE_TO_FILTER}_inj_vol_thresh_{INJECTION_VOLUME_THRESHOLD}_{main_structure}'
folderpath = connectivity_path / foldername
mkdir(folderpath)

In [None]:
filename = f'ipsilateral_centroids_dict_hem_{HEMISPHERE_TO_FILTER}_inj_vol_thresh_{INJECTION_VOLUME_THRESHOLD}_{main_structure}.pkl'
with open(folderpath / filename, 'wb') as f: pickle.dump(ipsilateral_centroids_dict, f)
filename = f'contralateral_centroids_dict_hem_{HEMISPHERE_TO_FILTER}_inj_vol_thresh_{INJECTION_VOLUME_THRESHOLD}_{main_structure}.pkl'
with open(folderpath / filename, 'wb') as f: pickle.dump(contralateral_centroids_dict, f)

In [None]:
# Reading from a file
filename = f'ipsilateral_centroids_dict_hem_{HEMISPHERE_TO_FILTER}_inj_vol_thresh_{INJECTION_VOLUME_THRESHOLD}_{main_structure}.pkl'
with open(folderpath / filename, 'rb') as f: ipsilateral_centroids_dict = pickle.load(f)
filename = f'contralateral_centroids_dict_hem_{HEMISPHERE_TO_FILTER}_inj_vol_thresh_{INJECTION_VOLUME_THRESHOLD}_{main_structure}.pkl'
with open(folderpath / filename, 'rb') as f: contralateral_centroids_dict = pickle.load(f)