In [7]:
import nrrd
import pandas as pd
import numpy as np
import pickle
import os, ast, re

import json
from voxcell import RegionMap
import multiprocessing as mp

# USE annotation hierarchy generated densities
Use both new json and evolved CCFv3a to produce average regional cell type densities

In [2]:
root_folder = '/gpfs/bbp.cscs.ch/data/project/proj84/csaba/aibs_10x_mouse_wholebrain/results/density_calculations/'
results_folder = f'{root_folder}total_nrrd/'

In [3]:
# Function to convert string to list
def convert_to_list(x):
    try:
        return ast.literal_eval(x)
    except (ValueError, SyntaxError):
        return x  # Return as-is if it cannot be evaluated


#Save the hierarchy file as dataframe: this contains all children and volume for each region
save_file_here = '/gpfs/bbp.cscs.ch/data/project/proj84/csaba/aibs_10x_mouse_wholebrain/results/df_hierarchy_ccfv3_l23split_barrelsplit.csv'
merged_df = pd.read_csv(save_file_here, index_col='Unnamed: 0')

# Convert 'children' column from string to list
if 'children' in merged_df.columns:
    merged_df['children'] = merged_df['children'].apply(convert_to_list)


In [4]:
merged_df.head(3)

Unnamed: 0,id,acronym,name,vox_count25,vol_mm3,children,acr_list
0,997,root,root,32750610.0,511.728281,"[304325711, 1811993763, 484682512, 2500193001,...","['retina', 'root_O', 'scwm', 'fiber tracts_O',..."
1,8,grey,Basic cell groups and regions,29153428.0,455.522313,"[3092369320, 3101970431, 1144, 1145, 1143, 989...","['CB_O', 'MB_O', 'CBXmo', 'CBXpu', 'CBXgr', 'F..."
2,567,CH,Cerebrum,17629726.0,275.464469,"[16, 583, 131, 780, 2416897036, 3034756217, 21...","['6b', 'CLA', 'LA', 'PA', 'CTXsp_O', 'STR_O', ..."


Next we generate total density files to calculate densities everywhere (of the json file):

# Create NRRD files: 
From only globally scaled ABC density data:

In [5]:
%%time 

meta_path = "/gpfs/bbp.cscs.ch/data/project/proj84/csaba/aibs_10x_mouse_wholebrain/metadata/WMB-10X/20231215/views/cell_metadata_with_cluster_annotation.csv"
columns_to_read = ['class', 'subclass', 'cluster']
#metadata = pd.read_csv(meta_path, dtype={'cell_label':str}, low_memory=False)
metadata = pd.read_csv(meta_path, usecols=columns_to_read, )

n_classes = ['01 IT-ET Glut', '02 NP-CT-L6b Glut', '03 OB-CR Glut',
       '04 DG-IMN Glut', '05 OB-IMN GABA', '06 CTX-CGE GABA',
       '07 CTX-MGE GABA', '08 CNU-MGE GABA', '09 CNU-LGE GABA',
       '10 LSX GABA', '11 CNU-HYa GABA', '12 HY GABA', '13 CNU-HYa Glut',
       '14 HY Glut', '15 HY Gnrh1 Glut', '16 HY MM Glut', '17 MH-LH Glut',
       '18 TH Glut', '19 MB Glut', '20 MB GABA', '21 MB Dopa',
       '22 MB-HB Sero', '23 P Glut', '24 MY Glut', '25 Pineal Glut',
       '26 P GABA', '27 MY GABA', '28 CB GABA', '29 CB Glut',]

nn_classes = ['30 Astro-Epen', '31 OPC-Oligo', '32 OEC', '33 Vascular',
       '34 Immune']

exc_classes = ['01 IT-ET Glut', '02 NP-CT-L6b Glut', '03 OB-CR Glut',
      '04 DG-IMN Glut', '13 CNU-HYa Glut', '14 HY Glut', '15 HY Gnrh1 Glut', '16 HY MM Glut', '17 MH-LH Glut',
      '18 TH Glut', '19 MB Glut', '23 P Glut', '24 MY Glut', '25 Pineal Glut', '29 CB Glut',]
inh_classes = ['05 OB-IMN GABA', '06 CTX-CGE GABA', '07 CTX-MGE GABA', '08 CNU-MGE GABA', '09 CNU-LGE GABA',
      '10 LSX GABA', '11 CNU-HYa GABA', '12 HY GABA', '20 MB GABA', '26 P GABA', '27 MY GABA', '28 CB GABA', ]
other_classes = ['21 MB Dopa', '22 MB-HB Sero', ]
exci_inhib_sum_classes = exc_classes + inh_classes

astrotypes_clusters = ['5206 Bergmann NN_1', '5207 Astro-CB NN_1', '5208 Astro-NT NN_1',
       '5209 Astro-NT NN_1', '5210 Astro-NT NN_1', '5211 Astro-NT NN_1',
       '5212 Astro-NT NN_1', '5213 Astro-NT NN_1', '5214 Astro-NT NN_2',
       '5215 Astro-NT NN_2', '5216 Astro-NT NN_2', '5217 Astro-NT NN_2',
       '5218 Astro-TE NN_1', '5219 Astro-TE NN_1', '5220 Astro-TE NN_1',
       '5221 Astro-TE NN_1', '5222 Astro-TE NN_2', '5223 Astro-TE NN_2',
       '5224 Astro-TE NN_3', '5225 Astro-TE NN_3', '5226 Astro-TE NN_3',
       '5227 Astro-TE NN_3', '5228 Astro-TE NN_4', '5229 Astro-TE NN_5',
       '5230 Astro-TE NN_5', '5231 Astro-OLF NN_1', '5232 Astro-OLF NN_1',
       '5233 Astro-OLF NN_2', '5234 Astro-OLF NN_2',
       '5235 Astro-OLF NN_3', '5236 Astro-OLF NN_3',]

microglia_clusters = ['5312 Microglia NN_1']

oligos_clusters = [ '5266 OPC NN_1', '5267 OPC NN_1',
       '5268 OPC NN_1', '5269 OPC NN_1', '5270 OPC NN_1', '5271 OPC NN_2',
       '5272 COP NN_1', '5273 COP NN_1', '5274 COP NN_1', '5275 COP NN_1',
       '5276 COP NN_1', '5277 COP NN_1', '5278 NFOL NN_2',
       '5279 NFOL NN_2', '5280 NFOL NN_2', '5281 NFOL NN_2',
       '5282 MFOL NN_3', '5283 MFOL NN_3', '5284 MOL NN_4',
       '5285 MOL NN_4', '5286 MOL NN_4', '5287 MOL NN_4', '5288 MOL NN_4',]

glia_clusters = astrotypes_clusters + microglia_clusters + oligos_clusters

neurontypes = np.unique(metadata[metadata['class'].isin(n_classes)]['cluster'].values)
nonneurontypes = np.unique(metadata[metadata['class'].isin(nn_classes)]['cluster'].values)
exctypes = np.unique(metadata[metadata['class'].isin(exc_classes)]['cluster'].values)
inhtypes = np.unique(metadata[metadata['class'].isin(inh_classes)]['cluster'].values)
othertypes = np.unique(metadata[metadata['class'].isin(other_classes)]['cluster'].values)
exci_inhib_sum = np.unique(metadata[metadata['class'].isin(exci_inhib_sum_classes)]['cluster'].values)
celltypes = np.unique(metadata['cluster'].values)

CPU times: user 16.2 s, sys: 436 ms, total: 16.7 s
Wall time: 16.9 s


In [10]:
file = '/gpfs/bbp.cscs.ch/data/project/proj84/csaba/aibs_10x_mouse_wholebrain/metadata/parcellation_to_parcellation_term_membership_extend.csv'
parcellation_annotation = pd.read_csv(file)

annotation_volume_folder = "/gpfs/bbp.cscs.ch/project/proj84/piluso/share/general/warped_augmented_CCFv3/"
CCFv3, _ = nrrd.read(f'{annotation_volume_folder}annotation_25_2022_CCFv3a.nrrd')
# CCFv3, _ = nrrd.read("/gpfs/bbp.cscs.ch/data/project/proj84/atlas_pipeline_runs/2024-05-15T22:44:26+02:00/annotation_ccfv3_l23split_barrelsplit_validated.nrrd")

In [6]:
%%time

file = os.path.join( root_folder, 'scaled_densities_global_only.pickle' )

#Load region id volumes from volume_calc_from_template.ipynb
with open(file, 'rb') as pickle_file:
    scaled_combined_result_dataframes = pickle.load(pickle_file)
print("Loaded pickle file.")

import sys
sys.path.append('/gpfs/bbp.cscs.ch/data/project/proj84/csaba/aibs_10x_mouse_wholebrain/notebooks/scripts/')

from helper_functions import create_combined_dataframe
# #Create a dict of df, each containing a cell type's occurence in all regions and its densities in all regions
# result_dataframes = read_and_concat_csv_files_new(csv_filenames, unique_prefixes, folder_path) #[region]celltypes
# combined_result_dataframes = combine_rows_and_calculate_average(result_dataframes)
shuffled_combined_dataframes = create_combined_dataframe(scaled_combined_result_dataframes) #[celltypes]region


Loaded pickle file.
CPU times: user 37.1 s, sys: 98.6 ms, total: 37.2 s
Wall time: 39.8 s


In [24]:
len(scaled_combined_result_dataframes.keys())

703

In [11]:
def nrrd_for_validation(df, parcellation_annotation, CCFv3):
    all_ids_for_df = []
    df_comb = pd.DataFrame()

    for regionname in df.index.values[0:]:
        density = df.loc[regionname, 'density_mm3']
        #annotation_id_info = substructures[substructures['cluster_as_filename'] == regionname]
        annotation_id_info = parcellation_annotation[parcellation_annotation['cluster_as_filename'] == regionname]

        Annotation2020ids = [int(re.search(r'\d+$', s).group()) for s in annotation_id_info['parcellation_label'].values]
        df_sub = pd.DataFrame({'density': density}, index=Annotation2020ids)
        df_comb = pd.concat([df_comb, df_sub])
        all_ids_for_df.append(Annotation2020ids)

    all_ids_for_df = [value for sublist in all_ids_for_df for value in sublist]
    all_ids_for_df.append(0)
    #Place to put extra regions not part of Allen's Parcellation annotation

    outside = 0
    outsideid = [0]
    df_sub = pd.DataFrame({'density': outside}, index=outsideid)
    df_comb = pd.concat([df_comb, df_sub])

    CCFv3_copy = CCFv3.copy()

    # Expression is 0 in those regions where we don't have any info:
    CCFv3_copy[~np.isin(CCFv3_copy, all_ids_for_df)] = 0.0 

    # Expression is non-zero in these leaf region(s)
    for index, row in df_comb.iterrows():
        density_value = row['density']
        region_id = index
        CCFv3_copy[np.isin(CCFv3, region_id)] = density_value

    #Create outside of the brain as 0
    CCFv3_copy[np.isin(CCFv3, int(0))] = 0

    return CCFv3_copy

In [12]:
%%time

def process_type(types, file_name):
    # Filter DataFrames based on types
    filtered_dataframes = {key: value for key, value in shuffled_combined_dataframes.items() if key in types}
    
    # Combine filtered DataFrames
    combined_df = pd.concat(filtered_dataframes.values())
    
    # Sum the combined DataFrame by index
    summed_df = combined_df.groupby(combined_df.index).sum()
    
    # Validate result
    result = nrrd_for_validation(summed_df, parcellation_annotation, CCFv3)
    
    # Clean up
    del combined_df, summed_df, filtered_dataframes
    
    return (result, file_name)

def main():
    # Define the parameters for each process
    tasks = [
        (neurontypes, "globalonly_scaled_total_neuron_densities"),
        (nonneurontypes, "globalonly_scaled_total_nonneuron_densities"),
        (exctypes, "globalonly_scaled_total_excitatory_densities"),
        (inhtypes, "globalonly_scaled_total_inhibitory_densities"),
        (astrotypes_clusters, "globalonly_scaled_total_astrotypes_densities"),
        (microglia_clusters, "globalonly_scaled_total_microglia_densities"),
        (oligos_clusters, "globalonly_scaled_total_oligocyte_densities"),
        (glia_clusters, "globalonly_scaled_total_glia_densities"),
        (exci_inhib_sum, "globalonly_scaled_total_excinh_densities"),
        (celltypes, "globalonly_scaled_total_celltypes_densities"),
    ]
    
    # Create a multiprocessing Pool
    with mp.Pool(processes=mp.cpu_count()) as pool:
        results = pool.starmap(process_type, tasks)
    
    # Sequentially write the .nrrd files to avoid concurrent writes
    for result, file_name in results:
        print(file_name)
        nrrd.write(f"{results_folder}{file_name}.nrrd", result)

if __name__ == "__main__":
    main()

globalonly_scaled_total_neuron_densities
globalonly_scaled_total_nonneuron_densities
globalonly_scaled_total_excitatory_densities
globalonly_scaled_total_inhibitory_densities
globalonly_scaled_total_astrotypes_densities
globalonly_scaled_total_microglia_densities
globalonly_scaled_total_oligocyte_densities
globalonly_scaled_total_glia_densities
globalonly_scaled_total_excinh_densities
globalonly_scaled_total_celltypes_densities
CPU times: user 1min 6s, sys: 5.58 s, total: 1min 12s
Wall time: 13min 40s


In [13]:
#EXC
filename = "globalonly_scaled_total_excitatory_densities.nrrd"
full_path = os.path.join(results_folder, filename)
if os.path.isfile(full_path):
    exc, _ = nrrd.read(full_path)
#INH        
filename = "globalonly_scaled_total_inhibitory_densities.nrrd"
full_path = os.path.join(results_folder, filename)
if os.path.isfile(full_path):
    inh, _ = nrrd.read(full_path)
#GLIA    
filename = "globalonly_scaled_total_glia_densities.nrrd"
full_path = os.path.join(results_folder, filename)
if os.path.isfile(full_path):
    glia, _ = nrrd.read(full_path)
#NEURON
filename = "globalonly_scaled_total_neuron_densities.nrrd"
full_path = os.path.join(results_folder, filename)
if os.path.isfile(full_path):
    neuron, _ = nrrd.read(full_path)
#ASTRO
filename = "globalonly_scaled_total_astrotypes_densities.nrrd"
full_path = os.path.join(results_folder, filename)
if os.path.isfile(full_path):
    astro, _ = nrrd.read(full_path)
#OLIGO
filename = "globalonly_scaled_total_oligocyte_densities.nrrd"
full_path = os.path.join(results_folder, filename)
if os.path.isfile(full_path):
    oligo, _ = nrrd.read(full_path)
#NONNEURON
filename = "globalonly_scaled_total_nonneuron_densities.nrrd"
full_path = os.path.join(results_folder, filename)
if os.path.isfile(full_path):
    nonneuron, _ = nrrd.read(full_path)
#MICROGLIA
filename = "globalonly_scaled_total_microglia_densities.nrrd"
full_path = os.path.join(results_folder, filename)
if os.path.isfile(full_path):
    microglia, _ = nrrd.read(full_path)

exc.shape, inh.shape, glia.shape, neuron.shape, astro.shape, oligo.shape, nonneuron.shape, microglia.shape

((566, 320, 456),
 (566, 320, 456),
 (566, 320, 456),
 (566, 320, 456),
 (566, 320, 456),
 (566, 320, 456),
 (566, 320, 456),
 (566, 320, 456))

In [14]:
mask = (CCFv3 == 2646114338)

neuron[mask].mean()

  neuron[mask].mean()
  ret = ret.dtype.type(ret / rcount)


nan

In [None]:
%%time

# Arrays to process
arrays = {
    'neurons_mm3': neuron,
    'inh_mm3': inh,
    'exc_mm3': exc,
    'glia_mm3': glia,
    'astro_mm3': astro,
    'oligo_mm3': oligo,
    'microglia_mm3': microglia, 
    'nonneuron_mm3': nonneuron
}

# Worker function to process each row
def process_row(row):
    id_ = row['id']
    acr = row['acronym']
    name = row['name']
    ids = row['children']
    ids.append(id_)
    
    
    # Create a mask for the current id list
    mask = np.isin(CCFv3, [ids])
    
    # Calculate mean/median values (can place NaN or None in the regions not present in CCfV3)
    # mean_values = {name: arr[mask].mean() if arr[mask].size > 
    #                0 else np.nan for name, arr in arrays.items()}
    mean_values = {name: arr[mask].mean() if arr[mask].size > 
                   0 else None for name, arr in arrays.items()}
    # mean_values = {name: np.median(arr[mask]) if arr[mask].size > 
    #                0 else None for name, arr in arrays.items()}
    
    # Construct the result dictionary
    result = {
        'id': id_,
        'acronym': acr,
        'name': name,
        **mean_values
    }
    return result

# Use multiprocessing Pool to process rows in parallel
if __name__ == '__main__':
    with mp.Pool(processes=mp.cpu_count()) as pool:
        new_data = pool.map(process_row, [row for _, row in merged_df.iterrows()])

    # Convert new_data list to a DataFrame
    new_data_df = pd.DataFrame(new_data).set_index('id')

new_data_df.shape

In [13]:
new_data_df.to_csv(f"{root_folder}globalonly_scaled_mean_tr_densities.csv")

In [28]:
new_data_df[new_data_df.index.isin([2646114338, 1060511842, 3412423041	])]

Unnamed: 0_level_0,acronym,name,neurons_mm3,inh_mm3,exc_mm3,glia_mm3,astro_mm3,oligo_mm3,microglia_mm3,nonneuron_mm3
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2646114338,FRP2,"Frontal pole, layer 2",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3412423041,MOs2,"Secondary motor area, layer 2",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1060511842,sup_O,supraoptic commissures: Other,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
